Skip to content

Commit 8d2ebbd

Browse files
committed
Made log loader more robust to different file formats or seperators
1 parent a428df7 commit 8d2ebbd

File tree

1 file changed

+50
-6
lines changed
  • src/datalab_app_plugin_insitu/apps/xrd

1 file changed

+50
-6
lines changed

src/datalab_app_plugin_insitu/apps/xrd/utils.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,19 +99,27 @@ def process_local_xrd_data(
9999

100100
# Load the 1D data
101101
if log_path.exists():
102-
log_files = list(log_path.glob("*.csv"))
102+
# Look for both CSV and text files
103+
csv_files = list(log_path.glob("*.csv"))
104+
txt_files = list(log_path.glob("*.txt"))
105+
log_files = csv_files + txt_files
106+
103107
if len(log_files) > 1:
104108
raise ValueError(
105-
f"Log folder should contain exactly one CSV file: {log_path}. Found {len(log_files)} files. Files found: {log_files}"
109+
f"Log folder should contain exactly one data file: {log_path}. Found {len(log_files)} files. Files found: {log_files}"
106110
)
107111
# TODO handle multiple files
108112
elif len(log_files) == 0:
109-
raise ValueError(f"Log folder should contain at least one CSV file: {log_path}")
113+
raise ValueError(
114+
f"Log folder should contain at least one CSV or TXT file: {log_path}"
115+
)
110116
else:
111117
log_file = log_files[0]
112118

113119
else:
114-
raise FileNotFoundError(f"No log files found with extension .csv in {log_path}")
120+
raise FileNotFoundError(
121+
f"No log files found with extension .csv or .txt in {log_path}"
122+
)
115123

116124
try:
117125
if time_series_source == "echem":
@@ -315,6 +323,8 @@ def load_temperature_log_file(log_file: Path) -> pd.DataFrame:
315323
"""
316324
Load temperature log file and return as a DataFrame. This currently assumes the Temperature is recorded in Celsius.
317325
326+
Supports both CSV and text files with common delimiters (comma, tab, whitespace).
327+
318328
Args:
319329
log_file (Path): Path to the temperature log file, must contain scan_number and Temp as column headers.
320330
@@ -324,7 +334,23 @@ def load_temperature_log_file(log_file: Path) -> pd.DataFrame:
324334
if not log_file.exists():
325335
raise FileNotFoundError(f"Log file does not exist: {log_file}")
326336

327-
log_df = pd.read_csv(log_file)
337+
# Try to read the file with different delimiters
338+
try:
339+
# First try comma-separated (CSV)
340+
log_df = pd.read_csv(log_file, sep=",")
341+
except Exception:
342+
try:
343+
# Try tab-separated
344+
log_df = pd.read_csv(log_file, sep="\t")
345+
except Exception:
346+
try:
347+
# Try whitespace-separated
348+
log_df = pd.read_csv(log_file, sep=r"\s+")
349+
except Exception as e:
350+
raise ValueError(
351+
f"Failed to parse log file {log_file}. Tried comma, tab, and whitespace delimiters. Error: {str(e)}"
352+
)
353+
328354
if "scan_number" not in log_df.columns:
329355
raise ValueError("Log file must contain a 'scan_number' column.")
330356

@@ -338,6 +364,8 @@ def load_echem_log_file(log_file: Path) -> pd.DataFrame:
338364
"""
339365
Load electrochemical log file and return as a DataFrame.
340366
367+
Supports both CSV and text files with common delimiters (comma, tab, whitespace).
368+
341369
Args:
342370
log_file (Path): Path to the electrochemical log file, must contain scan_number, start_time and end_time as column headers.
343371
@@ -347,7 +375,23 @@ def load_echem_log_file(log_file: Path) -> pd.DataFrame:
347375
if not log_file.exists():
348376
raise FileNotFoundError(f"Log file does not exist: {log_file}")
349377

350-
log_df = pd.read_csv(log_file)
378+
# Try to read the file with different delimiters
379+
try:
380+
# First try comma-separated (CSV)
381+
log_df = pd.read_csv(log_file, sep=",")
382+
except Exception:
383+
try:
384+
# Try tab-separated
385+
log_df = pd.read_csv(log_file, sep="\t")
386+
except Exception:
387+
try:
388+
# Try whitespace-separated
389+
log_df = pd.read_csv(log_file, sep=r"\s+")
390+
except Exception as e:
391+
raise ValueError(
392+
f"Failed to parse log file {log_file}. Tried comma, tab, and whitespace delimiters. Error: {str(e)}"
393+
)
394+
351395
if "scan_number" not in log_df.columns:
352396
raise ValueError("Log file must contain a 'scan_number' column.")
353397

0 commit comments

Comments
 (0)