Skip to content

Commit 46af2ff

Browse files
authored
Merge pull request #277 from dmaresma/fix/issue_276_and_add_pattern
fix 276 and Snowflake External Table improvement with Pattern keyword
2 parents 363d27a + 80dc236 commit 46af2ff

File tree

8 files changed

+539
-54644
lines changed

8 files changed

+539
-54644
lines changed

CHANGELOG.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
**v1.5.3**
2-
### Fixes
2+
### Improvements
3+
#### Snowflake :
4+
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
5+
2.
36

4-
1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
7+
### Fixes
58

9+
1. In Snowflake unexpected error when STRIP_OUTER_ARRAY property in file_format statement - https://github.com/xnuinside/simple-ddl-parser/issues/276
10+
2.
611

712
**v1.5.2**
813
### Improvements
914
#### MySQL
1015
1. Added support for COLLATE - https://github.com/xnuinside/simple-ddl-parser/pull/266/files
1116

17+
### Fixes
18+
19+
1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
1220

1321
**v1.5.1**
1422
### Improvements

simple_ddl_parser/ddl_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
8585
t_tag = self.parse_tags_symbols(t)
8686
if t_tag:
8787
return t_tag
88-
if "ARRAY" in t.value:
88+
if t.value.startswith("ARRAY"):
8989
t.type = "ARRAY"
9090
return t
9191
elif self.lexer.is_like:

simple_ddl_parser/dialects/hql.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ class HQL:
77
def p_expression_location(self, p: List) -> None:
88
"""expr : expr LOCATION STRING
99
| expr LOCATION DQ_STRING
10-
| expr LOCATION table_property_equals"""
10+
| expr LOCATION multi_id_or_string
11+
"""
1112
p[0] = p[1]
1213
p_list = list(p)
1314
p[0]["location"] = p_list[-1]

simple_ddl_parser/dialects/snowflake.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ def p_multi_id_or_string(self, p: List) -> None:
3434
p[0] = p[1]
3535
p[0].append(p_list[-1])
3636
else:
37-
value = " ".join(p_list[1:])
38-
p[0] = value
37+
totrim = " ".join(p_list[1:])
38+
p[0] = totrim.replace(' = ', '=').replace('= ', '')
3939

4040
def p_fmt_equals(self, p: List) -> None:
4141
"""fmt_equals : id LP multi_id_or_string RP
@@ -210,6 +210,12 @@ def p_expression_auto_refresh(self, p: List) -> None:
210210
p_list = remove_par(list(p))
211211
p[0]["auto_refresh"] = p_list[-1]
212212

213+
def p_expression_pattern(self, p: List) -> None:
214+
"""expr : expr PATTERN table_property_equals"""
215+
p[0] = p[1]
216+
p_list = remove_par(list(p))
217+
p[0]["pattern"] = p_list[-1]
218+
213219
def p_as_virtual(self, p: List):
214220
"""as_virtual : AS LP id LP id LP pid RP COMMA pid RP RP
215221
| AS LP id LP pid RP RP

simple_ddl_parser/exception.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@
44

55

66
class SimpleDDLParserException(Exception):
7-
"""Base exception in simple ddl parser library"""
8-
7+
""" Base exception in simple ddl parser library """
98
pass

simple_ddl_parser/parsetab.py

Lines changed: 489 additions & 54630 deletions
Large diffs are not rendered by default.

simple_ddl_parser/tokens.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
"DATA_RETENTION_TIME_IN_DAYS",
125125
"MAX_DATA_EXTENSION_TIME_IN_DAYS",
126126
"CHANGE_TRACKING",
127+
"PATTERN",
127128
"AUTO_REFRESH",
128129
"FILE_FORMAT",
129130
"TABLE_FORMAT",

tests/dialects/test_snowflake.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -827,16 +827,19 @@ def test_order_sequence():
827827

828828
def test_virtual_column_ext_table():
829829
ddl = """
830-
create or replace external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
830+
create external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
831831
"type" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2 )),
832832
"year" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 2), '=', 2)),
833833
"month" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 3), '=', 2)),
834834
"day" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 4), '=', 2)),
835+
"cast_YEAR" VARCHAR(200) AS (GET(VALUE,'c1')::string),
835836
"path" VARCHAR(255) AS (METADATA$FILENAME)
836837
)
837838
partition by ("type", "year", "month", "day", "path")
838-
location=@ADL_Azure_Storage_Account_Container_Name/
839+
location=@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/
839840
auto_refresh=false
841+
pattern='*.csv'
842+
file_format = (TYPE = JSON NULL_IF = () STRIP_OUTER_ARRAY = TRUE )
840843
;
841844
"""
842845
result_ext_table = DDLParser(ddl, normalize_names=True, debug=True).run(
@@ -901,6 +904,19 @@ def test_virtual_column_ext_table():
901904
"as": "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',4),'=',2)"
902905
},
903906
},
907+
{
908+
"name": "cast_YEAR",
909+
"type": "VARCHAR",
910+
"size": 200,
911+
"references": None,
912+
"unique": False,
913+
"nullable": True,
914+
"default": None,
915+
"check": None,
916+
"generated": {
917+
"as": "GET(VALUE,'c1') ::string"
918+
},
919+
},
904920
{
905921
"name": "path",
906922
"type": "VARCHAR",
@@ -924,12 +940,17 @@ def test_virtual_column_ext_table():
924940
"schema": "TABLE_DATA_SRC",
925941
"table_name": "EXT_PAYLOAD_MANIFEST_WEB",
926942
"tablespace": None,
927-
"replace": True,
928943
"external": True,
929944
"if_not_exists": True,
930-
"location": "@ADL_Azure_Storage_Account_Container_Name/",
945+
"location": "@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/",
931946
"table_properties": {
932947
"auto_refresh": False,
948+
"pattern": "'*.csv'",
949+
"file_format" : {
950+
"TYPE" : "JSON",
951+
"NULL_IF": "()",
952+
"STRIP_OUTER_ARRAY" : "TRUE",
953+
}
933954
},
934955
}
935956
]
@@ -943,7 +964,7 @@ def test_virtual_column_table():
943964
id bigint,
944965
derived bigint as (id * 10)
945966
)
946-
location = @Database.Schema.ADL_Azure_Storage_Account_Container_Name/entity
967+
location = @ADL_Azure_Storage_Account_Container_Name/entity
947968
auto_refresh = false
948969
file_format = (TYPE=JSON NULL_IF=('field') DATE_FORMAT=AUTO TRIM_SPACE=TRUE)
949970
stage_file_format = (TYPE=JSON NULL_IF=())
@@ -991,7 +1012,7 @@ def test_virtual_column_table():
9911012
"tablespace": None,
9921013
"replace": True,
9931014
"if_not_exists": True,
994-
"location": "ADL_Azure_Storage_Account_Container_Name/entity",
1015+
"location": "@ADL_Azure_Storage_Account_Container_Name/entity",
9951016
"table_properties": {
9961017
"auto_refresh": False,
9971018
"file_format": {

0 commit comments

Comments
 (0)