diff --git a/envs/.env.taginfo.example b/envs/.env.taginfo.example
index c1d3ffb1..75ae1516 100644
--- a/envs/.env.taginfo.example
+++ b/envs/.env.taginfo.example
@@ -1,15 +1,14 @@
-#######################################
-# Environment variables for taginfo database
-#######################################
-URL_PLANET_FILE_STATE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/state.txt
-URL_HISTORY_PLANET_FILE_STATE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/state.txt
-URL_PLANET_FILE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/planet-200526_0000.osm.pbf
-URL_HISTORY_PLANET_FILE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/history-200526_0000.osh.pbf
-INSTANCE_URL=http://localhost:4567
-INSTANCE_NAME="OHM Taginfo"
-INSTANCE_DESCRIPTION="This is a taginfo test instance. Change this text in your taginfo-config.json."
-INSTANCE_ICON=https://www.openhistoricalmap.org/assets/ohm_logo-2d97749faddd5bd051d846ed1be0544aa7c92422b673eb43d2fd6edf3428986d.svg
-INSTANCE_CONTACT= "Anonymous"
+URL_PLANET_FILE_STATE=https://s3.amazonaws.com/osm-seed.org/planet/state.txt
+URL_HISTORY_PLANET_FILE_STATE=https://s3.amazonaws.com/osm-seed.org/planet/full-history/state.txt
+URL_PLANET_FILE='none'
+URL_HISTORY_PLANET_FILE='none'
+TIME_UPDATE_INTERVAL=7d
TAGINFO_PROJECT_REPO=https://github.com/OpenHistoricalMap/taginfo-projects.git
DOWNLOAD_DB='languages wiki'
-CREATE_DB='db projects chronology'
\ No newline at end of file
+CREATE_DB='db projects chronology'
+ENVIRONMENT=production
+INTERVAL_DOWNLOAD_DATA=7d
+FETCH_DB_FILES=false
+TAGINFO_DB_BASE_URL=https://osm-seed.s3.amazonaws.com/taginfo/staging
+AWS_S3_BUCKET=osm-seed
+
diff --git a/images/taginfo/Dockerfile b/images/taginfo/Dockerfile
index 137196b3..ca71fd1e 100644
--- a/images/taginfo/Dockerfile
+++ b/images/taginfo/Dockerfile
@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y \
jq \
python3-pip \
wget \
+ cron \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
diff --git a/images/taginfo/README.md b/images/taginfo/README.md
index bf5cf5dd..2101528f 100644
--- a/images/taginfo/README.md
+++ b/images/taginfo/README.md
@@ -1,41 +1,43 @@
# OSM-Seed taginfo
-We build a docker container for taginfo software, the container will start the web service and also process required files to create databases.
+Docker container for taginfo that runs the web service and processes PBF files to create databases.
## Environment Variables
-All environment variables are located at [`.env.taginfo.example`](./../../envs/.env.taginfo.example), make a copy and name it as `.env.tagninfo` to use in osm-seed.
+Copy [`.env.taginfo.example`](./../../envs/.env.taginfo.example) to `.env.taginfo` and configure:
-- `URL_PLANET_FILE_STATE`: Url to the state file, that contains the URL for the latest planet PBF file. e.g [`state.txt`](https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/state.txt), This is no required in case you set the `URL_PLANET_FILE` env var
+### Planet Files
+- `URL_PLANET_FILE_STATE`: URL to state file with latest planet PBF URL (optional if `URL_PLANET_FILE` is set)
+- `URL_HISTORY_PLANET_FILE_STATE`: URL to state file with latest history PBF URL (optional if `URL_HISTORY_PLANET_FILE` is set)
+- `URL_PLANET_FILE`: Direct URL to planet PBF file
+- `URL_HISTORY_PLANET_FILE`: Direct URL to history PBF file
-- `URL_HISTORY_PLANET_FILE_STATE`: Url to the full history state file, that contains the URL for the latest full history planet PBF file. e.g [`state.txt`](https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/state.txt), This is no required in case you set the `URL_HISTORY_PLANET_FILE` env var
+### Database Configuration
+- `TAGINFO_DB_BASE_URL`: Base URL to download SQLite database files. Downloads: projects-cache.db, selection.db, taginfo-chronology.db, taginfo-db.db, taginfo-history.db, taginfo-languages.db, taginfo-master.db, taginfo-projects.db, taginfo-wiki.db, taginfo-wikidata.db
+ - Example: `https://osm-seed.org.s3.amazonaws.com/taginfo`
-- `URL_PLANET_FILE`: URL for the latest planet PBF file.
-- `URL_HISTORY_PLANET_FILE`: URL for the latest full history planet PBF file.
-- `TIME_UPDATE_INTERVAL` Interval time to update the databases, e.g: `50m` = every 50 minutes, `20h` = every 20 hours , `5d` = every 5 days
+- `DOWNLOAD_DB`: Which databases to download (e.g., `languages wiki` or `languages wiki projects chronology`)
-The following env vars are required in the instance to update the values at: https://github.com/taginfo/taginfo/blob/master/taginfo-config-example.json
+- `CREATE_DB`: Which databases to create from PBF files (e.g., `db projects` or `db projects chronology`)
+ - `db` requires `URL_PLANET_FILE` or `URL_PLANET_FILE_STATE`
+ - `projects` requires `TAGINFO_PROJECT_REPO`
+ - `chronology` requires `URL_PLANET_FILE` or `URL_HISTORY_PLANET_FILE`
-- `OVERWRITE_CONFIG_URL`: config file with the values to update
+### Other
+- `TAGINFO_PROJECT_REPO`: Repository URL for taginfo projects (default: https://github.com/taginfo/taginfo-projects.git)
+- `OVERWRITE_CONFIG_URL`: URL to custom taginfo config JSON file
+- `INTERVAL_DOWNLOAD_DATA`: Interval to sync databases (e.g., `3600` for 1 hour, `7d` for 7 days)
-- `DOWNLOAD_DB`: Taginfo instances need 7 Sqlite databases to start up the web service, all of them can be downloaded from https://taginfo.openstreetmap.org/download. Or if you can download only some of them you can pass herec. e.g DOWNLOAD_DB=`languages wiki`, or DOWNLOAD_DB=`languages wiki projects chronology`.
-
-- `CREATE_DB`: If you want process you of data using the PBF files, you can pass the values. eg. CREATE_DB=`db projects` or CREATE_DB=`db projects chronology`.
- Note:
- - Value `db` require to pass `URL_PLANET_FILE` or `URL_PLANET_FILE_STATE`
- - Value `projects` require to pass `TAGINFO_PROJECT_REPO`
- - Value `chronology` require to pass `URL_PLANET_FILE` or `URL_HISTORY_PLANET_FILE`
-
-#### Running taginfo container
+## Running
```sh
- # Docker compose
- docker-compose run taginfo
-
- # Docker
- docker run \
- --env-file ./envs/.env.taginfo \
- -v ${PWD}/data/taginfo-data:/apps/data/ \
- --network osm-seed_default \
- -it osmseed-taginfo:v1
-```
\ No newline at end of file
+# Docker compose
+docker-compose run taginfo
+
+# Docker
+docker run \
+ --env-file ./envs/.env.taginfo \
+ -v ${PWD}/data/taginfo-data:/usr/src/app/data \
+ --network osm-seed_default \
+ -it osmseed-taginfo:v1
+```
diff --git a/images/taginfo/start.sh b/images/taginfo/start.sh
index 2c5d69c5..b56c52fe 100755
--- a/images/taginfo/start.sh
+++ b/images/taginfo/start.sh
@@ -49,8 +49,8 @@ process_data() {
mv $DATADIR/*.db $DATADIR/
mv $DATADIR/*/*.db $DATADIR/
# if AWS_S3_BUCKET is set upload data
- if ! aws s3 ls "s3://$AWS_S3_BUCKET/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then
- aws s3 sync $DATADIR/ s3://$AWS_S3_BUCKET/$ENVIRONMENT/ --exclude "*" --include "*.db"
+ if ! aws s3 ls "s3://$AWS_S3_BUCKET/taginfo" 2>&1 | grep -q 'An error occurred'; then
+ aws s3 sync $DATADIR/ s3://$AWS_S3_BUCKET/taginfo/ --exclude "*" --include "*.db"
fi
}
@@ -63,25 +63,60 @@ compress_files() {
}
download_db_files() {
- if ! aws s3 ls "s3://$AWS_S3_BUCKET/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then
- aws s3 sync "s3://$AWS_S3_BUCKET/$ENVIRONMENT/" "$DATADIR/"
- mv $DATADIR/*.db $DATADIR/
- mv $DATADIR/*/*.db $DATADIR/
- compress_files
+ local base_url=$1
+
+ if [ -z "$base_url" ]; then
+ echo "Error: URL base is required for download_db_files"
+ return 1
fi
+
+ # Ensure base_url ends with /
+ if [[ ! "$base_url" =~ /$ ]]; then
+ base_url="${base_url}/"
+ fi
+
+ # List of SQLite database files to download
+ local db_files=(
+ "projects-cache.db"
+ "selection.db"
+ "taginfo-chronology.db"
+ "taginfo-db.db"
+ "taginfo-history.db"
+ "taginfo-languages.db"
+ "taginfo-master.db"
+ "taginfo-projects.db"
+ "taginfo-wiki.db"
+ "taginfo-wikidata.db"
+ )
+
+ echo "Downloading SQLite database files from: $base_url"
+
+ for db_file in "${db_files[@]}"; do
+ local file_url="${base_url}${db_file}"
+ local output_path="${DATADIR}/${db_file}"
+
+ echo "Downloading: $db_file"
+ if wget -q --show-progress -O "$output_path" --no-check-certificate "$file_url"; then
+ echo "Successfully downloaded: $db_file"
+ else
+ echo "Warning: Failed to download $db_file from $file_url"
+ # Continue with other files even if one fails
+ fi
+ done
+
+ echo "Database files download completed"
}
sync_latest_db_version() {
while true; do
+ download_db_files "$TAGINFO_DB_BASE_URL"
sleep "$INTERVAL_DOWNLOAD_DATA"
- download_db_files
done
}
start_web() {
echo "Start...Taginfo web service"
- download_db_files
- cd $WORKDIR/taginfo/web && ./taginfo.rb & sync_latest_db_version
+ cd $WORKDIR/taginfo/web && ./taginfo.rb
}
ACTION=$1
@@ -89,6 +124,11 @@ ACTION=$1
[[ ! -z ${OVERWRITE_CONFIG_URL} ]] && wget $OVERWRITE_CONFIG_URL -O /usr/src/app/taginfo-config.json
updates_source_code
if [ "$ACTION" = "web" ]; then
+ # Start sync in background if enabled
+ if [ "${FETCH_DB_FILES:-true}" = "true" ] && [ ! -z "$TAGINFO_DB_BASE_URL" ]; then
+ sync_latest_db_version &
+ fi
+ # Start web server in foreground (so the loop can detect if it fails)
start_web
elif [ "$ACTION" = "data" ]; then
process_data
diff --git a/osm-seed/templates/taginfo/taginfo-configMap.yaml b/osm-seed/templates/taginfo/taginfo-configMap.yaml
index c7b76bdc..9691609a 100644
--- a/osm-seed/templates/taginfo/taginfo-configMap.yaml
+++ b/osm-seed/templates/taginfo/taginfo-configMap.yaml
@@ -15,5 +15,6 @@ data:
TIME_UPDATE_INTERVAL: {{ .Values.taginfo.env.TIME_UPDATE_INTERVAL | quote }}
AWS_S3_BUCKET: {{ .Values.taginfo.env.AWS_S3_BUCKET | quote }}
ENVIRONMENT: {{ .Values.taginfo.env.ENVIRONMENT | quote }}
- INTERVAL_DOWNLOAD_DATA: {{ .Values.taginfo.env.INTERVAL_DOWNLOAD_DATA | quote}}
+ INTERVAL_DOWNLOAD_DATA: {{ .Values.taginfo.env.INTERVAL_DOWNLOAD_DATA | quote }}
+ TAGINFO_DB_BASE_URL: {{ .Values.taginfo.env.TAGINFO_DB_BASE_URL | quote }}
{{- end }}
diff --git a/osm-seed/templates/taginfo/taginfo-cronJob.yaml b/osm-seed/templates/taginfo/taginfo-cronJob.yaml
index a693c8e8..95f069c9 100644
--- a/osm-seed/templates/taginfo/taginfo-cronJob.yaml
+++ b/osm-seed/templates/taginfo/taginfo-cronJob.yaml
@@ -19,7 +19,10 @@ spec:
spec:
template:
spec:
+ {{- if .Values.taginfo.serviceAccount.enabled }}
serviceAccountName: {{ .Values.taginfo.serviceAccount.name }}
+ automountServiceAccountToken: true
+ {{- end }}
containers:
- name: {{ .Release.Name }}-taginfo-job
image: "{{ .Values.taginfo.image.name }}:{{ .Values.taginfo.image.tag }}"
diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml
index e51e32c8..72bc53cf 100644
--- a/osm-seed/values.yaml
+++ b/osm-seed/values.yaml
@@ -1030,6 +1030,7 @@ taginfo:
ENVIRONMENT: development
AWS_S3_BUCKET: taginfo
INTERVAL_DOWNLOAD_DATA: 3600
+ TAGINFO_DB_BASE_URL: https://planet.openhistoricalmap.org.s3.amazonaws.com/taginfo
resources:
enabled: false
requests: