Skip to content

Commit 1f6ad69

Browse files
author
ci bot
committed
Merge branch 'db-data-type' into 'enterprise'
feat(data-type): track database data type for columns See merge request dkinternal/testgen/dataops-testgen!327
2 parents b092836 + a38046d commit 1f6ad69

34 files changed

+154
-52
lines changed

testgen/commands/queries/profiling_query.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class CProfilingSQL:
2222
col_name = ""
2323
col_gen_type = ""
2424
col_type = ""
25+
db_data_type = ""
2526
col_ordinal_position = "0"
2627
col_is_decimal = ""
2728
col_top_freq_update = ""
@@ -99,6 +100,7 @@ def _get_params(self) -> dict:
99100
"COL_NAME_SANITIZED": self.col_name.replace("'", "''"),
100101
"COL_GEN_TYPE": self.col_gen_type,
101102
"COL_TYPE": self.col_type or "",
103+
"DB_DATA_TYPE": self.db_data_type or "",
102104
"COL_POS": self.col_ordinal_position,
103105
"TOP_FREQ": self.col_top_freq_update,
104106
"PROFILE_RUN_ID": self.profile_run_id,

testgen/commands/run_profiling_bridge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,8 @@ def run_profiling_queries(table_group_id: str, username: str | None = None, spin
317317
clsProfiling.data_schema = dctColumnRecord["table_schema"]
318318
clsProfiling.data_table = dctColumnRecord["table_name"]
319319
clsProfiling.col_name = dctColumnRecord["column_name"]
320-
clsProfiling.col_type = dctColumnRecord["data_type"]
320+
clsProfiling.col_type = dctColumnRecord["column_type"]
321+
clsProfiling.db_data_type = dctColumnRecord["db_data_type"]
321322
clsProfiling.profile_run_id = profiling_run_id
322323
clsProfiling.col_is_decimal = dctColumnRecord["is_decimal"]
323324
clsProfiling.col_ordinal_position = dctColumnRecord["ordinal_position"]

testgen/commands/run_refresh_data_chars.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def run_refresh_data_chars_queries(params: TestExecutionParams, run_date: str, s
5050
"position",
5151
"general_type",
5252
"column_type",
53+
"db_data_type",
5354
"record_ct",
5455
]
5556
staging_records = [
@@ -62,7 +63,8 @@ def run_refresh_data_chars_queries(params: TestExecutionParams, run_date: str, s
6263
item["column_name"],
6364
item["ordinal_position"],
6465
item["general_type"],
65-
item["data_type"],
66+
item["column_type"],
67+
item["db_data_type"],
6668
count_map.get(f"{item['table_schema']}.{item['table_name']}", 0),
6769
]
6870
for item in ddf_results

testgen/template/data_chars/data_chars_update.sql

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ WITH new_chars AS (
120120
position,
121121
general_type,
122122
column_type,
123+
db_data_type,
123124
functional_data_type,
124125
run_date
125126
FROM {SOURCE_TABLE}
@@ -130,8 +131,9 @@ update_chars AS (
130131
SET ordinal_position = n.position,
131132
general_type = n.general_type,
132133
column_type = n.column_type,
134+
db_data_type = n.db_data_type,
133135
functional_data_type = COALESCE(n.functional_data_type, d.functional_data_type),
134-
last_mod_date = CASE WHEN n.column_type <> d.column_type THEN n.run_date ELSE d.last_mod_date END,
136+
last_mod_date = CASE WHEN n.db_data_type <> d.db_data_type THEN n.run_date ELSE d.last_mod_date END,
135137
drop_date = NULL
136138
FROM new_chars n
137139
INNER JOIN data_column_chars d ON (
@@ -142,22 +144,22 @@ update_chars AS (
142144
)
143145
WHERE data_column_chars.table_id = d.table_id
144146
AND data_column_chars.column_name = d.column_name
145-
RETURNING data_column_chars.*, d.column_type as old_column_type
147+
RETURNING data_column_chars.*, d.db_data_type as old_data_type
146148
)
147149
INSERT INTO data_structure_log (
148150
element_id,
149151
change_date,
150152
change,
151-
old_column_type,
152-
new_column_type
153+
old_data_type,
154+
new_data_type
153155
)
154156
SELECT u.column_id,
155157
u.last_mod_date,
156158
'M',
157-
u.old_column_type,
158-
u.column_type
159+
u.old_data_type,
160+
u.db_data_type
159161
FROM update_chars u
160-
WHERE u.old_column_type <> u.column_type;
162+
WHERE u.old_data_type <> u.db_data_type;
161163

162164

163165
-- Add new records
@@ -169,6 +171,7 @@ WITH new_chars AS (
169171
position,
170172
general_type,
171173
column_type,
174+
db_data_type,
172175
functional_data_type,
173176
run_date
174177
FROM {SOURCE_TABLE}
@@ -184,6 +187,7 @@ inserted_records AS (
184187
ordinal_position,
185188
general_type,
186189
column_type,
190+
db_data_type,
187191
functional_data_type,
188192
add_date,
189193
last_mod_date
@@ -196,6 +200,7 @@ inserted_records AS (
196200
n.position,
197201
n.general_type,
198202
n.column_type,
203+
n.db_data_type,
199204
n.functional_data_type,
200205
n.run_date,
201206
n.run_date
@@ -218,12 +223,12 @@ INSERT INTO data_structure_log (
218223
element_id,
219224
change_date,
220225
change,
221-
new_column_type
226+
new_data_type
222227
)
223228
SELECT i.column_id,
224229
i.add_date,
225230
'A',
226-
i.column_type
231+
i.db_data_type
227232
FROM inserted_records i;
228233

229234
-- Mark dropped records
@@ -263,10 +268,10 @@ INSERT INTO data_structure_log (
263268
element_id,
264269
change_date,
265270
change,
266-
old_column_type
271+
old_data_type
267272
)
268273
SELECT del.column_id,
269274
del.drop_date,
270275
'D',
271-
del.column_type
276+
del.db_data_type
272277
FROM deleted_records del;

testgen/template/dbsetup/030_initialize_new_schema_structure.sql

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ CREATE TABLE stg_data_chars_updates (
4040
position INTEGER,
4141
general_type VARCHAR(1),
4242
column_type VARCHAR(50),
43+
db_data_type VARCHAR(50),
4344
functional_data_type VARCHAR(50),
4445
record_ct BIGINT
4546
);
@@ -247,6 +248,7 @@ CREATE TABLE profile_results (
247248
position INTEGER,
248249
column_name VARCHAR(120),
249250
column_type VARCHAR(50),
251+
db_data_type VARCHAR(50),
250252
general_type VARCHAR(1),
251253
record_ct BIGINT,
252254
value_ct BIGINT,
@@ -340,6 +342,7 @@ CREATE TABLE profile_anomaly_results (
340342
table_name VARCHAR(120),
341343
column_name VARCHAR(500),
342344
column_type VARCHAR(50),
345+
db_data_type VARCHAR(50),
343346
anomaly_id VARCHAR(10),
344347
detail VARCHAR,
345348
disposition VARCHAR(20), -- Confirmed, Dismissed, Inactive
@@ -369,11 +372,11 @@ CREATE TABLE data_structure_log (
369372
log_id UUID DEFAULT gen_random_uuid()
370373
CONSTRAINT pk_dsl_id
371374
PRIMARY KEY,
372-
element_id UUID,
373-
change_date TIMESTAMP,
374-
change VARCHAR(10),
375-
old_column_type VARCHAR(50),
376-
new_column_type VARCHAR(50)
375+
element_id UUID,
376+
change_date TIMESTAMP,
377+
change VARCHAR(10),
378+
old_data_type VARCHAR(50),
379+
new_data_type VARCHAR(50)
377380
);
378381

379382
CREATE TABLE data_table_chars (
@@ -418,6 +421,7 @@ CREATE TABLE data_column_chars (
418421
ordinal_position INTEGER,
419422
general_type VARCHAR(1),
420423
column_type VARCHAR(50),
424+
db_data_type VARCHAR(50),
421425
functional_data_type VARCHAR(50),
422426
description VARCHAR(1000),
423427
critical_data_element BOOLEAN,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
SET SEARCH_PATH TO {SCHEMA_NAME};
2+
3+
ALTER TABLE data_structure_log RENAME COLUMN old_column_type TO old_data_type;
4+
ALTER TABLE data_structure_log RENAME COLUMN new_column_type TO new_data_type;
5+
6+
ALTER TABLE stg_data_chars_updates ADD COLUMN db_data_type VARCHAR(50);
7+
ALTER TABLE profile_results ADD COLUMN db_data_type VARCHAR(50);
8+
ALTER TABLE profile_anomaly_results ADD COLUMN db_data_type VARCHAR(50);
9+
ALTER TABLE data_column_chars ADD COLUMN db_data_type VARCHAR(50);
10+
11+
UPDATE profile_results
12+
SET db_data_type = column_type
13+
WHERE db_data_type IS NULL;
14+
15+
UPDATE profile_anomaly_results
16+
SET db_data_type = column_type
17+
WHERE db_data_type IS NULL;
18+
19+
UPDATE data_column_chars
20+
SET db_data_type = column_type
21+
WHERE db_data_type IS NULL;

testgen/template/flavors/bigquery/data_chars/schema_ddf_query_bigquery.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ SELECT '{PROJECT_CODE}' AS project_code,
88
WHEN LOWER(c.data_type) = 'date' THEN 'date'
99
WHEN LOWER(c.data_type) = 'bool' THEN 'boolean'
1010
ELSE LOWER(c.data_type)
11-
END AS data_type,
11+
END AS column_type,
12+
c.data_type AS db_data_type,
1213
NULL AS character_maximum_length,
1314
c.ordinal_position,
1415
CASE

testgen/template/flavors/bigquery/profiling/project_profiling_query_bigquery.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ strTemplate01_5: |
1919
{COL_POS} AS position,
2020
'{COL_NAME_SANITIZED}' AS column_name,
2121
'{COL_TYPE}' AS column_type,
22+
'{DB_DATA_TYPE}' AS db_data_type,
2223
'{COL_GEN_TYPE}' AS general_type,
2324
COUNT(*) AS record_ct,
2425
strTemplate02_X: |

testgen/template/flavors/databricks/data_chars/schema_ddf_query_databricks.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ SELECT '{PROJECT_CODE}' AS project_code,
99
WHEN lower(c.full_data_type) IN ('double', 'float') THEN 'numeric'
1010
WHEN lower(c.full_data_type) LIKE 'decimal%' THEN 'numeric(' || c.numeric_precision || ',' || c.numeric_scale || ')'
1111
ELSE lower(c.full_data_type)
12-
END AS data_type,
12+
END AS column_type,
13+
c.full_data_type AS db_data_type,
1314
c.character_maximum_length,
1415
c.ordinal_position,
1516
CASE

testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ strTemplate01_5: |
1111
{COL_POS} AS position,
1212
'{COL_NAME_SANITIZED}' AS column_name,
1313
'{COL_TYPE}' AS column_type,
14+
'{DB_DATA_TYPE}' AS db_data_type,
1415
'{COL_GEN_TYPE}' AS general_type,
1516
COUNT(*) AS record_ct,
1617
strTemplate02_X: |

0 commit comments

Comments
 (0)