1- import ast
2- from io import StringIO
31import psycopg2
42import os
53import subprocess
64from time import time
7- from task_logger import log
5+ from task_logger import log
6+
87
98def restore_geocml_db_from_backups ():
109 try :
11- conn = psycopg2 .connect (dbname = "geocml_db" ,
12- user = "postgres" ,
13- password = os .environ ["GEOCML_POSTGRES_ADMIN_PASSWORD" ],
14- host = "geocml-postgres" ,
15- port = 5432 )
10+ conn = psycopg2 .connect (
11+ dbname = "geocml_db" ,
12+ user = "postgres" ,
13+ password = os .environ ["GEOCML_POSTGRES_ADMIN_PASSWORD" ],
14+ host = "geocml-postgres" ,
15+ port = 5432 ,
16+ )
1617 except psycopg2 .OperationalError :
17- log ("Couldn\ ' t connect to geocml_db; is the postgresql service started?" )
18+ log ("Couldn't connect to geocml_db; is the postgresql service started?" )
1819 return
1920
2021 db_backups_dir = os .path .join (os .sep , "Persistence" , "DBBackups" )
@@ -29,7 +30,11 @@ def restore_geocml_db_from_backups():
2930 most_recent_backup = subdir [0 ]
3031 except ValueError :
3132 if not subdir [0 ] == db_backups_dir :
32- log ("Found something unexpected in backup directory, skipping over: {}" .format (subdir [0 ]))
33+ log (
34+ "Found something unexpected in backup directory, skipping over: {}" .format (
35+ subdir [0 ]
36+ )
37+ )
3338
3439 if most_recent_backup == "" :
3540 log ("No recent backups found. Aborting restoration process." )
@@ -39,23 +44,59 @@ def restore_geocml_db_from_backups():
3944
4045 # Rebuild tables from .tabor file
4146
42- out = subprocess .run (["tabor" , "load" , "--file" , os .path .join (most_recent_backup , "geocml_db.tabor" ), "--db" , "geocml_db" , "--host" , "geocml-postgres" , "--username" , "postgres" , "--password" , os .environ ["GEOCML_POSTGRES_ADMIN_PASSWORD" ]], capture_output = True )
47+ out = subprocess .run (
48+ [
49+ "tabor" ,
50+ "load" ,
51+ "--file" ,
52+ os .path .join (most_recent_backup , "geocml_db.tabor" ),
53+ "--db" ,
54+ "geocml_db" ,
55+ "--host" ,
56+ "geocml-postgres" ,
57+ "--username" ,
58+ "postgres" ,
59+ "--password" ,
60+ os .environ ["GEOCML_POSTGRES_ADMIN_PASSWORD" ],
61+ ],
62+ capture_output = True ,
63+ )
4364 if out .stderr :
4465 log ("Failed to load tables from .tabor file" )
4566 return 0
4667
4768 cursor = conn .cursor ()
4869 cursor .execute ("SET session_replication_role = replica;" )
4970
50- for csv_data_file in os .listdir (most_recent_backup ): # load data from CSV backups
71+ for csv_data_file in os .listdir (most_recent_backup ): # load data from CSV backups
5172 file_name_split = csv_data_file .split (":" )
5273
5374 if file_name_split [0 ] == "data" :
5475 log ("Found CSV data file {}" .format (csv_data_file ))
55- file_name_split = file_name_split [1 ].split ("." )
56- data_file = open (os .path .join (db_backups_dir , most_recent_backup , csv_data_file ), "r" ).readlines ()
57- cursor .copy_from (StringIO ("" .join (data_file [1 ::])), f"{ file_name_split [1 ]} " , sep = "," ,
58- columns = tuple (data_file [0 ].replace ("\n " , "" ).split ("," )), null = "NULL" )
76+ file_name_split = file_name_split [1 ].split (".csv" )
77+ [schema , table ] = [
78+ file_name_split [0 ].split ("." )[0 ],
79+ file_name_split [0 ].split ("." )[1 ]
80+ ]
81+ csv_file_path = os .path .join (
82+ db_backups_dir , most_recent_backup , csv_data_file
83+ )
84+
85+ # Read header separately to get column names
86+ with open (csv_file_path , "r" ) as f :
87+ header = f .readline ().strip ()
88+ columns = tuple (header .replace ("\n " , "" ).split ("," ))
89+
90+ # Stream the data from the file
91+ with open (csv_file_path , "r" ) as f :
92+ # Skip the header line
93+ next (f )
94+ log (f"Loading data to: { schema } .{ table } " )
95+ cursor .execute (f'SET search_path TO { schema } ' )
96+ cursor .copy_from (
97+ f , table , sep = "," , columns = columns , null = "NULL"
98+ )
99+
59100 log ("Finished loading data!" )
60101
61102 conn .commit ()
0 commit comments