Skip to content

Commit 740864c

Browse files
MarvKlerMarvin Klerx
andauthored
Convert parquet timestamps directly to usable iso time format (#40)
* fix: fixed duplicated removal of first index / header * convert parquet timestamps directly to usable iso timeformat instead of nanosecond timestamp object * fix ruff linter --------- Co-authored-by: Marvin Klerx <marvin.klerx@imbus.de>
1 parent 11b0c91 commit 740864c

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

src/Tables/utils/file_reader.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,20 @@ def read_excel(self, path: Path, **kwargs) -> DataFrame:
230230

231231
def read_parquet(self, path: Path) -> DataFrame:
232232
""" """
233-
return pd.read_parquet(path)
233+
df: DataFrame = pd.read_parquet(path)
234+
235+
# try to transform to ISO timeformat -> if transformation fails, just return original parquet dataframe
236+
try:
237+
return self._parquet_transform_to_iso_timeformat(df)
238+
except Exception:
239+
return pd.read_parquet(path)
240+
241+
def _parquet_transform_to_iso_timeformat(self, df: DataFrame) -> DataFrame:
242+
ts_cols = df.select_dtypes(include=["datetime64[ns]", "datetime64[ns, UTC]"]).columns
243+
for col in ts_cols:
244+
df[col] = df[col].dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
245+
df[col] = df[col].str[:-3] + "Z"
246+
return df
234247

235248
def read_table_file(self, path: Path) -> DataFrame:
236249
"""

tests/atest/test_reader.robot

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,13 @@ Read Parquet File - Without Header
206206
${result} = BuiltIn.Evaluate "_time" not in "${content}"
207207
BuiltIn.Should Be True ${result}
208208

209+
Open Table - Parquet
210+
Tables.Configure Ignore Header False
211+
${alias} = Tables.Open Table ${CURDIR}${/}testdata${/}example_03.parquet
212+
@{data} = Tables.Get Table
213+
Should Contain ${data}[1][0] 2025
214+
Tables.Count Table ${alias} Rows == ${1001}
215+
209216
Get Table Cell - Parquet
210217
Tables.Configure Ignore Header False
211218
Tables.Open Table ${CURDIR}${/}testdata${/}example_03.parquet

0 commit comments

Comments
 (0)