11import os
2- from typing import List , Optional
2+ from typing import List , Optional , Tuple
33
44from unifair import runtime
5- from unifair .compute .flow import FuncFlowTemplate
5+ from unifair .compute .flow import FuncFlowTemplate , LinearFlowTemplate
6+ from unifair .compute .task import TaskTemplate
67from unifair .data .dataset import Dataset
78from unifair .data .model import Model
89from unifair .modules .general .tasks import import_directory , split_dataset
@@ -66,18 +67,33 @@ def attrib_df_names(dataset: Dataset[Model[object]]) -> List[str]:
6667# Flow
6768@FuncFlowTemplate
6869def convert_gff_files (data : Dataset [Model [str ]]) -> PandasDataset :
70+ data = import_directory ('input/gff' , suffix = '.gff' , model = Model [str ])
71+ serialize_to_tarpacked_raw_files ('1_data' , data )
72+
6973 data_2 = slice_lines (data , start = 0 , end = 1000 )
74+ serialize_to_tarpacked_raw_files ('2_data' , data_2 )
7075
7176 pd_data_3 = from_csv (data_2 , delimiter = '\t ' , first_row_as_col_names = False , col_names = GFF_COLS )
77+ serialize_to_tarpacked_csv_files ('3_pd_data' , pd_data_3 )
78+
7279 pd_data_4 = extract_columns_as_files (pd_data_3 , [ATTRIB_COL ])
80+ serialize_to_tarpacked_csv_files ('4_pd_data' , pd_data_4 )
81+
7382 pd_data_5_main , pd_data_3_attrib = split_dataset (pd_data_4 , attrib_df_names (pd_data_4 ))
83+ serialize_to_tarpacked_csv_files ('5_pd_data_main' , pd_data_5_main )
7484
7585 data_6_attrib = to_csv (pd_data_3_attrib , first_row_as_col_names = False )
86+ serialize_to_tarpacked_raw_files ('6_raw_data_attributes' , data_6_attrib )
87+
7688 data_7_attrib = transform_all_lines_to_json (data_6_attrib )
89+ serialize_to_tarpacked_raw_files ('7_raw_data_attributes' , data_7_attrib )
90+
7791 data_8_attrib = transform_datafile_start_and_end_to_json (data_7_attrib )
92+ serialize_to_tarpacked_raw_files ('8_raw_data_attributes' , data_8_attrib )
7893
7994 data_9_attrib = Dataset [JsonTableOfStrings ]()
8095 data_9_attrib .from_json (data_8_attrib .to_data ())
96+ serialize_to_tarpacked_json_files ('9_json_data_attributes' , data_9_attrib )
8197
8298 pd_data_7_attrib = PandasDataset ()
8399 pd_data_7_attrib .from_data (data_9_attrib .to_data ())
@@ -86,17 +102,8 @@ def convert_gff_files(data: Dataset[Model[str]]) -> PandasDataset:
86102 [pd_data_5_main , pd_data_7_attrib ],
87103 vertical = False ,
88104 )
89-
90- serialize_to_tarpacked_raw_files ('1_data' , data )
91- serialize_to_tarpacked_raw_files ('2_data' , data_2 )
92- serialize_to_tarpacked_csv_files ('3_pd_data' , pd_data_3 )
93- serialize_to_tarpacked_csv_files ('4_pd_data' , pd_data_4 )
94- serialize_to_tarpacked_csv_files ('5_pd_data_main' , pd_data_5_main )
95- serialize_to_tarpacked_raw_files ('6_raw_data_attributes' , data_6_attrib )
96- serialize_to_tarpacked_raw_files ('7_raw_data_attributes' , data_7_attrib )
97- serialize_to_tarpacked_raw_files ('8_raw_data_attributes' , data_8_attrib )
98- serialize_to_tarpacked_json_files ('9_json_data_attributes' , data_9_attrib )
99105 serialize_to_tarpacked_csv_files ('10_pd_data' , pd_data_10 )
106+ return pd_data_10
100107
101108
102109@FuncFlowTemplate
0 commit comments