Skip to content

Commit 68c3778

Browse files
committed
WIP
1 parent 35870e7 commit 68c3778

File tree

2 files changed

+97
-34
lines changed

2 files changed

+97
-34
lines changed

build-resources/geocml-task-scheduler/geocml-task-scheduler/tasks/backup_geocml_db.py

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,52 @@
55
from time import time
66
from task_logger import log
77

8-
ignore_tables = ("spatial_ref_sys", "geometry_columns", "geography_columns") # TODO: https://github.com/geoCML/tabor/issues/7
8+
ignore_tables = (
9+
"spatial_ref_sys",
10+
"geometry_columns",
11+
"geography_columns",
12+
) # TODO: https://github.com/geoCML/tabor/issues/7
913
ignore_schemas = ("pg_catalog", "information_schema")
1014

15+
1116
def backup_geocml_db():
1217
try:
13-
conn = psycopg2.connect(dbname="geocml_db",
14-
user="postgres",
15-
password=os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"],
16-
host="geocml-postgres",
17-
port=5432)
18+
conn = psycopg2.connect(
19+
dbname="geocml_db",
20+
user="postgres",
21+
password=os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"],
22+
host="geocml-postgres",
23+
port=5432,
24+
)
1825
except psycopg2.OperationalError:
19-
log("Couldn\'t connect to geocml_db; is the postgresql service started?")
26+
log("Couldn't connect to geocml_db; is the postgresql service started?")
2027
return
2128

2229
back_up_timestamp = time()
23-
path_to_backup_dir = os.path.join(os.sep, "Persistence", "DBBackups", str(back_up_timestamp))
30+
path_to_backup_dir = os.path.join(
31+
os.sep, "Persistence", "DBBackups", str(back_up_timestamp)
32+
)
2433
os.mkdir(path_to_backup_dir)
2534
delete_backup_dir = True
2635

2736
# Write table schemata to .tabor file
28-
out = subprocess.run(["tabor", "write", "--db", "geocml_db",
29-
"--username", "geocml", "--password", os.environ["GEOCML_POSTGRES_PASSWORD"],
30-
"--host", "geocml-postgres",
31-
"--file", os.path.join(path_to_backup_dir, "geocml_db.tabor")],
32-
capture_output=True)
37+
out = subprocess.run(
38+
[
39+
"tabor",
40+
"write",
41+
"--db",
42+
"geocml_db",
43+
"--username",
44+
"geocml",
45+
"--password",
46+
os.environ["GEOCML_POSTGRES_PASSWORD"],
47+
"--host",
48+
"geocml-postgres",
49+
"--file",
50+
os.path.join(path_to_backup_dir, "geocml_db.tabor"),
51+
],
52+
capture_output=True,
53+
)
3354

3455
if out.stderr:
3556
log("Failed to generate .tabor file {}".format(out.stderr))
@@ -45,7 +66,9 @@ def backup_geocml_db():
4566
if schema[0] in ignore_schemas:
4667
continue
4768

48-
cursor.execute(f"""SELECT * FROM information_schema.tables WHERE table_schema = '{schema[0]}';""")
69+
cursor.execute(
70+
f"""SELECT * FROM information_schema.tables WHERE table_schema = '{schema[0]}';"""
71+
)
4972

5073
tables = cursor.fetchall()
5174

@@ -55,12 +78,16 @@ def backup_geocml_db():
5578

5679
delete_backup_dir = False
5780

58-
data_file_path = os.path.join(path_to_backup_dir, "data:{}.{}.csv".format(schema[0], table[2]))
59-
data_file = open(data_file_path, "w")
60-
cursor.copy_expert(f"""COPY {schema[0]}."{table[2]}" TO STDOUT WITH (FORMAT csv, DELIMITER ',', HEADER, NULL 'NULL');""", data_file)
61-
data_file.close()
81+
data_file_path = os.path.join(
82+
path_to_backup_dir, "data:{}.{}.csv".format(schema[0], table[2])
83+
)
84+
with open(data_file_path, "w") as data_file:
85+
cursor.copy_expert(
86+
f"""COPY {schema[0]}."{table[2]}" TO STDOUT WITH (FORMAT csv, DELIMITER ',', HEADER, NULL 'NULL');""",
87+
data_file,
88+
)
6289

63-
if delete_backup_dir: # nothing to back up
90+
if delete_backup_dir: # nothing to back up
6491
log("Nothing to backup")
6592
os.rmdir(path_to_backup_dir)
6693

build-resources/geocml-task-scheduler/geocml-task-scheduler/tasks/restore_geocml_db_from_backups.py

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
import ast
2-
from io import StringIO
31
import psycopg2
42
import os
53
import subprocess
64
from time import time
7-
from task_logger import log
5+
from task_logger import log
6+
87

98
def restore_geocml_db_from_backups():
109
try:
11-
conn = psycopg2.connect(dbname="geocml_db",
12-
user="postgres",
13-
password=os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"],
14-
host="geocml-postgres",
15-
port=5432)
10+
conn = psycopg2.connect(
11+
dbname="geocml_db",
12+
user="postgres",
13+
password=os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"],
14+
host="geocml-postgres",
15+
port=5432,
16+
)
1617
except psycopg2.OperationalError:
17-
log("Couldn\'t connect to geocml_db; is the postgresql service started?")
18+
log("Couldn't connect to geocml_db; is the postgresql service started?")
1819
return
1920

2021
db_backups_dir = os.path.join(os.sep, "Persistence", "DBBackups")
@@ -29,7 +30,11 @@ def restore_geocml_db_from_backups():
2930
most_recent_backup = subdir[0]
3031
except ValueError:
3132
if not subdir[0] == db_backups_dir:
32-
log("Found something unexpected in backup directory, skipping over: {}".format(subdir[0]))
33+
log(
34+
"Found something unexpected in backup directory, skipping over: {}".format(
35+
subdir[0]
36+
)
37+
)
3338

3439
if most_recent_backup == "":
3540
log("No recent backups found. Aborting restoration process.")
@@ -39,23 +44,54 @@ def restore_geocml_db_from_backups():
3944

4045
# Rebuild tables from .tabor file
4146

42-
out = subprocess.run(["tabor", "load", "--file", os.path.join(most_recent_backup, "geocml_db.tabor"), "--db", "geocml_db", "--host", "geocml-postgres", "--username", "postgres", "--password", os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"]], capture_output=True)
47+
out = subprocess.run(
48+
[
49+
"tabor",
50+
"load",
51+
"--file",
52+
os.path.join(most_recent_backup, "geocml_db.tabor"),
53+
"--db",
54+
"geocml_db",
55+
"--host",
56+
"geocml-postgres",
57+
"--username",
58+
"postgres",
59+
"--password",
60+
os.environ["GEOCML_POSTGRES_ADMIN_PASSWORD"],
61+
],
62+
capture_output=True,
63+
)
4364
if out.stderr:
4465
log("Failed to load tables from .tabor file")
4566
return 0
4667

4768
cursor = conn.cursor()
4869
cursor.execute("SET session_replication_role = replica;")
4970

50-
for csv_data_file in os.listdir(most_recent_backup): # load data from CSV backups
71+
for csv_data_file in os.listdir(most_recent_backup): # load data from CSV backups
5172
file_name_split = csv_data_file.split(":")
5273

5374
if file_name_split[0] == "data":
5475
log("Found CSV data file {}".format(csv_data_file))
5576
file_name_split = file_name_split[1].split(".")
56-
data_file = open(os.path.join(db_backups_dir, most_recent_backup, csv_data_file), "r").readlines()
57-
cursor.copy_from(StringIO("".join(data_file[1::])), f"{file_name_split[1]}", sep=",",
58-
columns=tuple(data_file[0].replace("\n", "").split(",")), null="NULL")
77+
csv_file_path = os.path.join(
78+
db_backups_dir, most_recent_backup, csv_data_file
79+
)
80+
81+
# Read header separately to get column names
82+
with open(csv_file_path, "r") as f:
83+
header = f.readline().strip()
84+
columns = tuple(header.replace("\n", "").split(","))
85+
86+
# Stream the data from the file
87+
with open(csv_file_path, "r") as f:
88+
# Skip the header line
89+
next(f)
90+
log("Loading data to: {}".format(file_name_split[1]))
91+
cursor.copy_from(
92+
f, f"{file_name_split[1]}", sep=",", columns=columns, null="NULL"
93+
)
94+
5995
log("Finished loading data!")
6096

6197
conn.commit()

0 commit comments

Comments
 (0)