|
1 | 1 | from typing import Any |
2 | 2 |
|
3 | 3 | import requests |
| 4 | +import pandas as pd |
4 | 5 |
|
5 | 6 | from unifair import runtime |
6 | 7 | from unifair.compute.flow import FuncFlowTemplate |
@@ -77,10 +78,38 @@ def to_pandas(dataset: Dataset[JsonTableOfStrings]) -> PandasDataset: |
77 | 78 |
|
78 | 79 | @TaskTemplate |
79 | 80 | def pandas_magic(pandas: PandasDataset) -> PandasDataset: |
80 | | - df = pandas['results.genes.synonyms'] |
81 | | - df['_unifair_ref'] = df['_unifair_ref'].str.strip('results.genes.') |
| 81 | + # Get synonym table and clean foreign key |
| 82 | + df_synonym = pandas['results.genes.synonyms'] |
| 83 | + df_synonym['_unifair_ref'] = df_synonym['_unifair_ref'].str.strip('results.genes.') |
| 84 | + |
| 85 | + # Get gene table and join with synonym table to get gene foreign id |
| 86 | + df_gene = pandas['results.genes'] |
| 87 | + df_merge_1 = pd.merge(df_synonym, df_gene, left_on = "_unifair_ref", right_on='_unifair_id', how = "right") |
| 88 | + df_merge_1 = df_merge_1.loc[:, ['value', '_unifair_ref_y']] |
| 89 | + df_merge_1.columns = ['synomym', '_unifair_ref'] |
| 90 | + df_merge_1['_unifair_ref'].replace('results.', '', inplace = True, regex = True) |
| 91 | + |
| 92 | + # print(df_gene) |
| 93 | + |
| 94 | + # Get keywords table and clean foreign key |
| 95 | + df_keywords = pandas['results.keywords'] |
| 96 | + df_keywords['_unifair_ref'].replace('results.', '', inplace = True, regex = True) |
| 97 | + df_keywords = df_keywords.loc[:, ['_unifair_ref', 'category', 'name']] |
| 98 | + |
| 99 | + # Merge keywords with synonym |
| 100 | + df_merge_2 = pd.merge(df_merge_1, df_keywords, on = "_unifair_ref", how = "right") |
| 101 | + |
| 102 | + # Get results table for regene name and primary accession |
| 103 | + df_results = pandas['results'] |
| 104 | + df_results = df_results.loc[:, ['_unifair_id', 'primaryAccession', 'uniProtkbId']] |
| 105 | + df_merge_final = pd.merge(df_merge_2, df_results, left_on = "_unifair_ref", right_on='_unifair_id', how = "right") |
| 106 | + |
82 | 107 | out_dataset = PandasDataset() |
83 | | - out_dataset['my_table'] = df |
| 108 | + out_dataset['my_table'] = df_merge_final |
| 109 | + |
| 110 | + print(len(df_results.index)) |
| 111 | + |
| 112 | + |
84 | 113 | return out_dataset |
85 | 114 |
|
86 | 115 |
|
|
0 commit comments