diff --git a/envs/.env.taginfo.example b/envs/.env.taginfo.example index c1d3ffb1..75ae1516 100644 --- a/envs/.env.taginfo.example +++ b/envs/.env.taginfo.example @@ -1,15 +1,14 @@ -####################################### -# Environment variables for taginfo database -####################################### -URL_PLANET_FILE_STATE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/state.txt -URL_HISTORY_PLANET_FILE_STATE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/state.txt -URL_PLANET_FILE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/planet-200526_0000.osm.pbf -URL_HISTORY_PLANET_FILE=https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/history-200526_0000.osh.pbf -INSTANCE_URL=http://localhost:4567 -INSTANCE_NAME="OHM Taginfo" -INSTANCE_DESCRIPTION="This is a taginfo test instance. Change this text in your taginfo-config.json." -INSTANCE_ICON=https://www.openhistoricalmap.org/assets/ohm_logo-2d97749faddd5bd051d846ed1be0544aa7c92422b673eb43d2fd6edf3428986d.svg -INSTANCE_CONTACT= "Anonymous" +URL_PLANET_FILE_STATE=https://s3.amazonaws.com/osm-seed.org/planet/state.txt +URL_HISTORY_PLANET_FILE_STATE=https://s3.amazonaws.com/osm-seed.org/planet/full-history/state.txt +URL_PLANET_FILE='none' +URL_HISTORY_PLANET_FILE='none' +TIME_UPDATE_INTERVAL=7d TAGINFO_PROJECT_REPO=https://github.com/OpenHistoricalMap/taginfo-projects.git DOWNLOAD_DB='languages wiki' -CREATE_DB='db projects chronology' \ No newline at end of file +CREATE_DB='db projects chronology' +ENVIRONMENT=production +INTERVAL_DOWNLOAD_DATA=7d +FETCH_DB_FILES=false +TAGINFO_DB_BASE_URL=https://osm-seed.s3.amazonaws.com/taginfo/staging +AWS_S3_BUCKET=osm-seed + diff --git a/images/taginfo/Dockerfile b/images/taginfo/Dockerfile index 137196b3..ca71fd1e 100644 --- a/images/taginfo/Dockerfile +++ b/images/taginfo/Dockerfile @@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y \ jq \ python3-pip \ wget \ + cron \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/images/taginfo/README.md b/images/taginfo/README.md index bf5cf5dd..2101528f 100644 --- a/images/taginfo/README.md +++ b/images/taginfo/README.md @@ -1,41 +1,43 @@ # OSM-Seed taginfo -We build a docker container for taginfo software, the container will start the web service and also process required files to create databases. +Docker container for taginfo that runs the web service and processes PBF files to create databases. ## Environment Variables -All environment variables are located at [`.env.taginfo.example`](./../../envs/.env.taginfo.example), make a copy and name it as `.env.tagninfo` to use in osm-seed. +Copy [`.env.taginfo.example`](./../../envs/.env.taginfo.example) to `.env.taginfo` and configure: -- `URL_PLANET_FILE_STATE`: Url to the state file, that contains the URL for the latest planet PBF file. e.g [`state.txt`](https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/state.txt), This is no required in case you set the `URL_PLANET_FILE` env var +### Planet Files +- `URL_PLANET_FILE_STATE`: URL to state file with latest planet PBF URL (optional if `URL_PLANET_FILE` is set) +- `URL_HISTORY_PLANET_FILE_STATE`: URL to state file with latest history PBF URL (optional if `URL_HISTORY_PLANET_FILE` is set) +- `URL_PLANET_FILE`: Direct URL to planet PBF file +- `URL_HISTORY_PLANET_FILE`: Direct URL to history PBF file -- `URL_HISTORY_PLANET_FILE_STATE`: Url to the full history state file, that contains the URL for the latest full history planet PBF file. e.g [`state.txt`](https://planet.openhistoricalmap.org.s3.amazonaws.com/planet/full-history/state.txt), This is no required in case you set the `URL_HISTORY_PLANET_FILE` env var +### Database Configuration +- `TAGINFO_DB_BASE_URL`: Base URL to download SQLite database files. Downloads: projects-cache.db, selection.db, taginfo-chronology.db, taginfo-db.db, taginfo-history.db, taginfo-languages.db, taginfo-master.db, taginfo-projects.db, taginfo-wiki.db, taginfo-wikidata.db + - Example: `https://osm-seed.org.s3.amazonaws.com/taginfo` -- `URL_PLANET_FILE`: URL for the latest planet PBF file. -- `URL_HISTORY_PLANET_FILE`: URL for the latest full history planet PBF file. -- `TIME_UPDATE_INTERVAL` Interval time to update the databases, e.g: `50m` = every 50 minutes, `20h` = every 20 hours , `5d` = every 5 days +- `DOWNLOAD_DB`: Which databases to download (e.g., `languages wiki` or `languages wiki projects chronology`) -The following env vars are required in the instance to update the values at: https://github.com/taginfo/taginfo/blob/master/taginfo-config-example.json +- `CREATE_DB`: Which databases to create from PBF files (e.g., `db projects` or `db projects chronology`) + - `db` requires `URL_PLANET_FILE` or `URL_PLANET_FILE_STATE` + - `projects` requires `TAGINFO_PROJECT_REPO` + - `chronology` requires `URL_PLANET_FILE` or `URL_HISTORY_PLANET_FILE` -- `OVERWRITE_CONFIG_URL`: config file with the values to update +### Other +- `TAGINFO_PROJECT_REPO`: Repository URL for taginfo projects (default: https://github.com/taginfo/taginfo-projects.git) +- `OVERWRITE_CONFIG_URL`: URL to custom taginfo config JSON file +- `INTERVAL_DOWNLOAD_DATA`: Interval to sync databases (e.g., `3600` for 1 hour, `7d` for 7 days) -- `DOWNLOAD_DB`: Taginfo instances need 7 Sqlite databases to start up the web service, all of them can be downloaded from https://taginfo.openstreetmap.org/download. Or if you can download only some of them you can pass herec. e.g DOWNLOAD_DB=`languages wiki`, or DOWNLOAD_DB=`languages wiki projects chronology`. - -- `CREATE_DB`: If you want process you of data using the PBF files, you can pass the values. eg. CREATE_DB=`db projects` or CREATE_DB=`db projects chronology`. - Note: - - Value `db` require to pass `URL_PLANET_FILE` or `URL_PLANET_FILE_STATE` - - Value `projects` require to pass `TAGINFO_PROJECT_REPO` - - Value `chronology` require to pass `URL_PLANET_FILE` or `URL_HISTORY_PLANET_FILE` - -#### Running taginfo container +## Running ```sh - # Docker compose - docker-compose run taginfo - - # Docker - docker run \ - --env-file ./envs/.env.taginfo \ - -v ${PWD}/data/taginfo-data:/apps/data/ \ - --network osm-seed_default \ - -it osmseed-taginfo:v1 -``` \ No newline at end of file +# Docker compose +docker-compose run taginfo + +# Docker +docker run \ + --env-file ./envs/.env.taginfo \ + -v ${PWD}/data/taginfo-data:/usr/src/app/data \ + --network osm-seed_default \ + -it osmseed-taginfo:v1 +``` diff --git a/images/taginfo/start.sh b/images/taginfo/start.sh index 2c5d69c5..b56c52fe 100755 --- a/images/taginfo/start.sh +++ b/images/taginfo/start.sh @@ -49,8 +49,8 @@ process_data() { mv $DATADIR/*.db $DATADIR/ mv $DATADIR/*/*.db $DATADIR/ # if AWS_S3_BUCKET is set upload data - if ! aws s3 ls "s3://$AWS_S3_BUCKET/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then - aws s3 sync $DATADIR/ s3://$AWS_S3_BUCKET/$ENVIRONMENT/ --exclude "*" --include "*.db" + if ! aws s3 ls "s3://$AWS_S3_BUCKET/taginfo" 2>&1 | grep -q 'An error occurred'; then + aws s3 sync $DATADIR/ s3://$AWS_S3_BUCKET/taginfo/ --exclude "*" --include "*.db" fi } @@ -63,25 +63,60 @@ compress_files() { } download_db_files() { - if ! aws s3 ls "s3://$AWS_S3_BUCKET/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then - aws s3 sync "s3://$AWS_S3_BUCKET/$ENVIRONMENT/" "$DATADIR/" - mv $DATADIR/*.db $DATADIR/ - mv $DATADIR/*/*.db $DATADIR/ - compress_files + local base_url=$1 + + if [ -z "$base_url" ]; then + echo "Error: URL base is required for download_db_files" + return 1 fi + + # Ensure base_url ends with / + if [[ ! "$base_url" =~ /$ ]]; then + base_url="${base_url}/" + fi + + # List of SQLite database files to download + local db_files=( + "projects-cache.db" + "selection.db" + "taginfo-chronology.db" + "taginfo-db.db" + "taginfo-history.db" + "taginfo-languages.db" + "taginfo-master.db" + "taginfo-projects.db" + "taginfo-wiki.db" + "taginfo-wikidata.db" + ) + + echo "Downloading SQLite database files from: $base_url" + + for db_file in "${db_files[@]}"; do + local file_url="${base_url}${db_file}" + local output_path="${DATADIR}/${db_file}" + + echo "Downloading: $db_file" + if wget -q --show-progress -O "$output_path" --no-check-certificate "$file_url"; then + echo "Successfully downloaded: $db_file" + else + echo "Warning: Failed to download $db_file from $file_url" + # Continue with other files even if one fails + fi + done + + echo "Database files download completed" } sync_latest_db_version() { while true; do + download_db_files "$TAGINFO_DB_BASE_URL" sleep "$INTERVAL_DOWNLOAD_DATA" - download_db_files done } start_web() { echo "Start...Taginfo web service" - download_db_files - cd $WORKDIR/taginfo/web && ./taginfo.rb & sync_latest_db_version + cd $WORKDIR/taginfo/web && ./taginfo.rb } ACTION=$1 @@ -89,6 +124,11 @@ ACTION=$1 [[ ! -z ${OVERWRITE_CONFIG_URL} ]] && wget $OVERWRITE_CONFIG_URL -O /usr/src/app/taginfo-config.json updates_source_code if [ "$ACTION" = "web" ]; then + # Start sync in background if enabled + if [ "${FETCH_DB_FILES:-true}" = "true" ] && [ ! -z "$TAGINFO_DB_BASE_URL" ]; then + sync_latest_db_version & + fi + # Start web server in foreground (so the loop can detect if it fails) start_web elif [ "$ACTION" = "data" ]; then process_data diff --git a/osm-seed/templates/taginfo/taginfo-configMap.yaml b/osm-seed/templates/taginfo/taginfo-configMap.yaml index c7b76bdc..9691609a 100644 --- a/osm-seed/templates/taginfo/taginfo-configMap.yaml +++ b/osm-seed/templates/taginfo/taginfo-configMap.yaml @@ -15,5 +15,6 @@ data: TIME_UPDATE_INTERVAL: {{ .Values.taginfo.env.TIME_UPDATE_INTERVAL | quote }} AWS_S3_BUCKET: {{ .Values.taginfo.env.AWS_S3_BUCKET | quote }} ENVIRONMENT: {{ .Values.taginfo.env.ENVIRONMENT | quote }} - INTERVAL_DOWNLOAD_DATA: {{ .Values.taginfo.env.INTERVAL_DOWNLOAD_DATA | quote}} + INTERVAL_DOWNLOAD_DATA: {{ .Values.taginfo.env.INTERVAL_DOWNLOAD_DATA | quote }} + TAGINFO_DB_BASE_URL: {{ .Values.taginfo.env.TAGINFO_DB_BASE_URL | quote }} {{- end }} diff --git a/osm-seed/templates/taginfo/taginfo-cronJob.yaml b/osm-seed/templates/taginfo/taginfo-cronJob.yaml index a693c8e8..95f069c9 100644 --- a/osm-seed/templates/taginfo/taginfo-cronJob.yaml +++ b/osm-seed/templates/taginfo/taginfo-cronJob.yaml @@ -19,7 +19,10 @@ spec: spec: template: spec: + {{- if .Values.taginfo.serviceAccount.enabled }} serviceAccountName: {{ .Values.taginfo.serviceAccount.name }} + automountServiceAccountToken: true + {{- end }} containers: - name: {{ .Release.Name }}-taginfo-job image: "{{ .Values.taginfo.image.name }}:{{ .Values.taginfo.image.tag }}" diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index e51e32c8..72bc53cf 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -1030,6 +1030,7 @@ taginfo: ENVIRONMENT: development AWS_S3_BUCKET: taginfo INTERVAL_DOWNLOAD_DATA: 3600 + TAGINFO_DB_BASE_URL: https://planet.openhistoricalmap.org.s3.amazonaws.com/taginfo resources: enabled: false requests: