Skip to content

Commit 9638d3f

Browse files
committed
Updated GFF example
1 parent 64b2368 commit 9638d3f

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

src/gff_example.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
2-
from typing import List, Optional
2+
from typing import List, Optional, Tuple
33

44
from unifair import runtime
5-
from unifair.compute.flow import FuncFlowTemplate
5+
from unifair.compute.flow import FuncFlowTemplate, LinearFlowTemplate
6+
from unifair.compute.task import TaskTemplate
67
from unifair.data.dataset import Dataset
78
from unifair.data.model import Model
89
from unifair.modules.general.tasks import import_directory, split_dataset
@@ -66,18 +67,33 @@ def attrib_df_names(dataset: Dataset[Model[object]]) -> List[str]:
6667
# Flow
6768
@FuncFlowTemplate
6869
def convert_gff_files(data: Dataset[Model[str]]) -> PandasDataset:
70+
data = import_directory('input/gff', suffix='.gff', model=Model[str])
71+
serialize_to_tarpacked_raw_files('1_data', data)
72+
6973
data_2 = slice_lines(data, start=0, end=1000)
74+
serialize_to_tarpacked_raw_files('2_data', data_2)
7075

7176
pd_data_3 = from_csv(data_2, delimiter='\t', first_row_as_col_names=False, col_names=GFF_COLS)
77+
serialize_to_tarpacked_csv_files('3_pd_data', pd_data_3)
78+
7279
pd_data_4 = extract_columns_as_files(pd_data_3, [ATTRIB_COL])
80+
serialize_to_tarpacked_csv_files('4_pd_data', pd_data_4)
81+
7382
pd_data_5_main, pd_data_3_attrib = split_dataset(pd_data_4, attrib_df_names(pd_data_4))
83+
serialize_to_tarpacked_csv_files('5_pd_data_main', pd_data_5_main)
7484

7585
data_6_attrib = to_csv(pd_data_3_attrib, first_row_as_col_names=False)
86+
serialize_to_tarpacked_raw_files('6_raw_data_attributes', data_6_attrib)
87+
7688
data_7_attrib = transform_all_lines_to_json(data_6_attrib)
89+
serialize_to_tarpacked_raw_files('7_raw_data_attributes', data_7_attrib)
90+
7791
data_8_attrib = transform_datafile_start_and_end_to_json(data_7_attrib)
92+
serialize_to_tarpacked_raw_files('8_raw_data_attributes', data_8_attrib)
7893

7994
data_9_attrib = Dataset[JsonTableOfStrings]()
8095
data_9_attrib.from_json(data_8_attrib.to_data())
96+
serialize_to_tarpacked_json_files('9_json_data_attributes', data_9_attrib)
8197

8298
pd_data_7_attrib = PandasDataset()
8399
pd_data_7_attrib.from_data(data_9_attrib.to_data())
@@ -86,17 +102,8 @@ def convert_gff_files(data: Dataset[Model[str]]) -> PandasDataset:
86102
[pd_data_5_main, pd_data_7_attrib],
87103
vertical=False,
88104
)
89-
90-
serialize_to_tarpacked_raw_files('1_data', data)
91-
serialize_to_tarpacked_raw_files('2_data', data_2)
92-
serialize_to_tarpacked_csv_files('3_pd_data', pd_data_3)
93-
serialize_to_tarpacked_csv_files('4_pd_data', pd_data_4)
94-
serialize_to_tarpacked_csv_files('5_pd_data_main', pd_data_5_main)
95-
serialize_to_tarpacked_raw_files('6_raw_data_attributes', data_6_attrib)
96-
serialize_to_tarpacked_raw_files('7_raw_data_attributes', data_7_attrib)
97-
serialize_to_tarpacked_raw_files('8_raw_data_attributes', data_8_attrib)
98-
serialize_to_tarpacked_json_files('9_json_data_attributes', data_9_attrib)
99105
serialize_to_tarpacked_csv_files('10_pd_data', pd_data_10)
106+
return pd_data_10
100107

101108

102109
@FuncFlowTemplate

0 commit comments

Comments
 (0)