Skip to content

Commit 0498f4b

Browse files
Merge pull request #398 from rustprooflabs/fix-397-update-append-errors
Fix handling of nested polygons with `--update` mode
2 parents f72fe10 + d2cc174 commit 0498f4b

File tree

11 files changed

+317
-276
lines changed

11 files changed

+317
-276
lines changed

db/deploy/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# PgOSM Flex SQL deploy scripts
2+
3+
The scripts in this folder are executed during PgOSM Flex initialization via
4+
the `prepare_osm_schema()` function in `docker/db.py`.
5+
New or removed files in this folder must be adjusted in that function
6+
as appropriate.

db/deploy/replication_functions.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
/*
2+
Creates functions used for maintaining data when --replication is used.
13
4+
These functions are also used when using `--update append` mode of
5+
PgOSM Flex.
6+
*/
27
BEGIN;
38

49

docker/db.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ def log_pg_details():
210210

211211

212212
def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
213-
"""Runs through series of steps to prepare database for PgOSM.
213+
"""Runs through steps to prepare the target database for PgOSM Flex.
214+
215+
Includes additional preparation for using --replication and --updated=append
216+
modes.
214217
215218
Parameters
216219
--------------------------
@@ -245,6 +248,9 @@ def prepare_pgosm_db(skip_qgis_style, db_path, import_mode, schema_name):
245248
schema_name=schema_name)
246249
run_insert_pgosm_road(db_path=db_path, schema_name=schema_name)
247250

251+
if import_mode.replication_update or import_mode.update == 'append':
252+
osm2pgsql_replication_start()
253+
248254

249255
def start_import(pgosm_region, pgosm_date, srid, language, layerset, git_info,
250256
osm2pgsql_version, import_mode, schema_name, input_file):
@@ -477,7 +483,7 @@ def get_db_conn(conn_string):
477483
return conn
478484

479485

480-
def pgosm_after_import(flex_path):
486+
def pgosm_after_import(flex_path: str) -> bool:
481487
"""Runs post-processing SQL via Lua script.
482488
483489
Layerset logic is established via environment variable, must happen
@@ -508,17 +514,38 @@ def pgosm_after_import(flex_path):
508514

509515

510516
def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
511-
"""Runs stored procedure to calculate nested admin polygons via psql.
517+
"""Runs two stored procedures to calculate nested admin polygons via psql.
512518
513519
Parameters
514520
----------------------
515521
flex_path : str
516522
schema_name : str
517523
"""
518-
sql_raw = f'CALL {schema_name}.build_nested_admin_polygons();'
524+
# Populate the table
525+
sql_raw_1 = f'CALL {schema_name}.populate_place_polygon_nested();'
519526

520527
conn_string = os.environ['PGOSM_CONN']
521-
cmds = ['psql', '-d', conn_string, '-c', sql_raw]
528+
cmds = ['psql', '-d', conn_string, '-c', sql_raw_1]
529+
LOGGER.info('Populating place_polygon_nested table (osm.populate_place_polygon_nested() )')
530+
output = subprocess.run(cmds,
531+
text=True,
532+
cwd=flex_path,
533+
check=False,
534+
stdout=subprocess.PIPE,
535+
stderr=subprocess.STDOUT)
536+
LOGGER.info(f'Nested polygon output: \n {output.stdout}')
537+
538+
if output.returncode != 0:
539+
err_msg = f'Failed to populate nested polygon data. Return code: {output.returncode}'
540+
LOGGER.error(err_msg)
541+
sys.exit(f'{err_msg} - Check the log output for details.')
542+
543+
544+
# Build the data
545+
sql_raw_2 = f' CALL {schema_name}.build_nested_admin_polygons();'
546+
547+
conn_string = os.environ['PGOSM_CONN']
548+
cmds = ['psql', '-d', conn_string, '-c', sql_raw_2]
522549
LOGGER.info('Building nested polygons... (this can take a while)')
523550
output = subprocess.run(cmds,
524551
text=True,
@@ -537,18 +564,23 @@ def pgosm_nested_admin_polygons(flex_path: str, schema_name: str):
537564

538565
def osm2pgsql_replication_start():
539566
"""Runs pre-replication step to clean out FKs that would prevent updates.
567+
568+
This function is necessary for using `--replication (osm2pgsql-replication)
569+
and `--update append` mode.
540570
"""
541571
LOGGER.info('Prep database to allow data updates.')
542-
# This use of append applies to both osm2pgsql --append and osm2pgsq-replication, not renaming from "append"
543572
sql_raw = 'CALL osm.append_data_start();'
544573

545574
with get_db_conn(conn_string=connection_string()) as conn:
546575
cur = conn.cursor()
547576
cur.execute(sql_raw)
548577

549578

550-
def osm2pgsql_replication_finish(skip_nested):
551-
"""Runs post-replication step to put FKs back and refresh materialied views.
579+
def osm2pgsql_replication_finish(skip_nested: bool):
580+
"""Runs post-replication step to refresh materialized views and rebuild
581+
nested data when appropriate.
582+
583+
Only needed for `--replication`, not used for `--update append` mode.
552584
553585
Parameters
554586
---------------------

docker/helpers.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,9 @@ def verify_checksum(md5_file: str, path: str):
109109
logger.debug('md5sum validated')
110110

111111

112-
def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
113-
layerset_path, replication, schema_name):
112+
def set_env_vars(region: str, subregion: str, srid: str, language: str,
113+
pgosm_date: str, layerset: str,
114+
layerset_path: str, schema_name: str, skip_nested: bool):
114115
"""Sets environment variables needed by PgOSM Flex. Also creates DB
115116
record in `osm.pgosm_flex` table.
116117
@@ -122,11 +123,11 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
122123
language : str
123124
pgosm_date : str
124125
layerset : str
126+
Name of layerset matching the INI filename.
125127
layerset_path : str
126128
str when set, or None
127-
replication : bool
128-
Indicates when osm2pgsql-replication is used
129129
schema_name : str
130+
skip_nested : bool
130131
"""
131132
logger = logging.getLogger('pgosm-flex')
132133
logger.debug('Ensuring env vars are not set from prior run')
@@ -159,6 +160,7 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
159160
pgosm_region = get_region_combined(region, subregion)
160161
logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}')
161162

163+
os.environ['SKIP_NESTED'] = str(skip_nested)
162164

163165

164166
def get_region_combined(region: str, subregion: str) -> str:
@@ -225,7 +227,7 @@ def get_git_info(tag_only: bool=False) -> str:
225227

226228

227229
def unset_env_vars():
228-
"""Unsets environment variables used by PgOSM Flex.
230+
"""Unset environment variables used by PgOSM Flex.
229231
230232
Does not pop POSTGRES_DB on purpose to allow non-Docker operation.
231233
"""
@@ -239,6 +241,7 @@ def unset_env_vars():
239241
os.environ.pop('PGOSM_CONN', None)
240242
os.environ.pop('PGOSM_CONN_PG', None)
241243
os.environ.pop('SCHEMA_NAME', None)
244+
os.environ.pop('SKIP_NESTED', None)
242245

243246

244247
class ImportMode():
@@ -310,17 +313,17 @@ def okay_to_run(self, prior_import: dict) -> bool:
310313
"""
311314
self.logger.debug(f'Checking if it is okay to run...')
312315
if self.force:
313-
self.logger.warning(f'Using --force, kiss existing data goodbye')
316+
self.logger.warning('Using --force, kiss existing data goodbye.')
314317
return True
315318

316319
# If no prior imports, do not require force
317320
if len(prior_import) == 0:
318-
self.logger.debug(f'No prior import found, okay to proceed.')
321+
self.logger.debug('No prior import found, okay to proceed.')
319322
return True
320323

321324
prior_replication = prior_import['replication']
322325

323-
# Check git version against latest.
326+
# Check PgOSM version using Git tags
324327
# If current version is lower than prior version from latest import, stop.
325328
prior_import_version = prior_import['pgosm_flex_version_no_hash']
326329
git_tag = get_git_info(tag_only=True)
@@ -345,6 +348,9 @@ def okay_to_run(self, prior_import: dict) -> bool:
345348
self.logger.debug('Okay to proceed with replication')
346349
return True
347350

351+
if self.update == 'append':
352+
return True
353+
348354
msg = 'Prior data exists in the osm schema and --force was not used.'
349355
self.logger.error(msg)
350356
return False

docker/pgosm_flex.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ def run_pgosm_flex(ram, region, subregion, debug, force,
9595
region = input_file
9696

9797
helpers.set_env_vars(region, subregion, srid, language, pgosm_date,
98-
layerset, layerset_path, replication, schema_name)
98+
layerset, layerset_path, schema_name,
99+
skip_nested)
99100
db.wait_for_postgres()
100101
if force and db.pg_conn_parts()['pg_host'] == 'localhost':
101102
msg = 'Using --force with the built-in database is unnecessary.'
@@ -267,7 +268,6 @@ def run_replication_update(skip_nested, flex_path):
267268
"""
268269
logger = logging.getLogger('pgosm-flex')
269270
conn_string = db.connection_string()
270-
db.osm2pgsql_replication_start()
271271

272272
update_cmd = """
273273
osm2pgsql-replication update -d $PGOSM_CONN \
@@ -531,10 +531,13 @@ def run_post_processing(flex_path, skip_nested, import_mode, schema_name):
531531
logger = logging.getLogger('pgosm-flex')
532532

533533
if not import_mode.run_post_sql:
534-
logger.info('Running with --update append: Skipping post-processing SQL')
534+
msg = 'Running with --update append: Skipping post-processing SQL.'
535+
msg += ' Running osm2pgsql_replication_finish() instead.'
536+
logger.info(msg)
537+
db.osm2pgsql_replication_finish(skip_nested=skip_nested)
535538
return True
536539

537-
post_processing_sql = db.pgosm_after_import(flex_path)
540+
post_processing_sql = db.pgosm_after_import(flex_path=flex_path)
538541

539542
if skip_nested:
540543
logger.info('Skipping calculating nested polygons')

docker/tests/test_geofabrik.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def setUp(self):
2222
pgosm_date=PGOSM_DATE,
2323
layerset=LAYERSET,
2424
layerset_path=None,
25-
replication=False,
26-
schema_name='osm')
25+
schema_name='osm',
26+
skip_nested=True)
2727

2828

2929
def tearDown(self):
@@ -44,8 +44,8 @@ def test_get_region_filename_returns_region_when_subregion_None(self):
4444
pgosm_date=PGOSM_DATE,
4545
layerset=LAYERSET,
4646
layerset_path=None,
47-
replication=False,
48-
schema_name='osm')
47+
schema_name='osm',
48+
skip_nested=True)
4949

5050
result = geofabrik.get_region_filename()
5151
expected = f'{REGION_US}-latest.osm.pbf'

docker/tests/test_pgosm_flex.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ def setUp(self):
1919
pgosm_date=PGOSM_DATE,
2020
layerset=LAYERSET,
2121
layerset_path=None,
22-
replication=False,
23-
schema_name='osm')
22+
schema_name='osm',
23+
skip_nested=True)
2424

2525

2626
def tearDown(self):
@@ -91,8 +91,8 @@ def test_get_export_filename_region_only(self):
9191
pgosm_date=PGOSM_DATE,
9292
layerset=LAYERSET,
9393
layerset_path=None,
94-
replication=False,
95-
schema_name='osm')
94+
schema_name='osm',
95+
skip_nested=True)
9696

9797
input_file = None
9898
result = pgosm_flex.get_export_filename(input_file)
@@ -109,8 +109,8 @@ def test_layerset_include_place_returns_boolean(self):
109109
pgosm_date=PGOSM_DATE,
110110
layerset=LAYERSET,
111111
layerset_path=layerset_path,
112-
replication=False,
113-
schema_name='osm')
112+
schema_name='osm',
113+
skip_nested=True)
114114

115115
paths = pgosm_flex.get_paths()
116116
result = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
@@ -128,8 +128,8 @@ def test_layerset_include_place_returns_True_with_default_layerset(self):
128128
pgosm_date=PGOSM_DATE,
129129
layerset=LAYERSET,
130130
layerset_path=layerset_path,
131-
replication=False,
132-
schema_name='osm')
131+
schema_name='osm',
132+
skip_nested=True)
133133

134134
paths = pgosm_flex.get_paths()
135135
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
@@ -147,8 +147,8 @@ def test_layerset_include_place_returns_false_when_place_false_in_ini(self):
147147
pgosm_date=PGOSM_DATE,
148148
layerset=layerset,
149149
layerset_path=layerset_path,
150-
replication=False,
151-
schema_name='osm')
150+
schema_name='osm',
151+
skip_nested=True)
152152

153153
paths = pgosm_flex.get_paths()
154154
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
@@ -166,8 +166,8 @@ def test_layerset_include_place_returns_false_when_place_missing_in_ini(self):
166166
pgosm_date=PGOSM_DATE,
167167
layerset=layerset,
168168
layerset_path=layerset_path,
169-
replication=False,
170-
schema_name='osm')
169+
schema_name='osm',
170+
skip_nested=True)
171171

172172
paths = pgosm_flex.get_paths()
173173
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])
@@ -185,8 +185,8 @@ def test_layerset_include_place_returns_true_when_place_true_in_ini(self):
185185
pgosm_date=PGOSM_DATE,
186186
layerset=layerset,
187187
layerset_path=layerset_path,
188-
replication=False,
189-
schema_name='osm')
188+
schema_name='osm',
189+
skip_nested=True)
190190

191191
paths = pgosm_flex.get_paths()
192192
actual = pgosm_flex.layerset_include_place(flex_path=paths['flex_path'])

0 commit comments

Comments
 (0)