Skip to content

Commit 19da0bc

Browse files
jCHENEBY-zzsveinugu
authored andcommitted
uniprot pandas output is now getting relevant info
1 parent 9638d3f commit 19da0bc

File tree

1 file changed

+32
-3
lines changed

1 file changed

+32
-3
lines changed

src/uniprot_example.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import Any
22

33
import requests
4+
import pandas as pd
45

56
from unifair import runtime
67
from unifair.compute.flow import FuncFlowTemplate
@@ -77,10 +78,38 @@ def to_pandas(dataset: Dataset[JsonTableOfStrings]) -> PandasDataset:
7778

7879
@TaskTemplate
7980
def pandas_magic(pandas: PandasDataset) -> PandasDataset:
80-
df = pandas['results.genes.synonyms']
81-
df['_unifair_ref'] = df['_unifair_ref'].str.strip('results.genes.')
81+
# Get synonym table and clean foreign key
82+
df_synonym = pandas['results.genes.synonyms']
83+
df_synonym['_unifair_ref'] = df_synonym['_unifair_ref'].str.strip('results.genes.')
84+
85+
# Get gene table and join with synonym table to get gene foreign id
86+
df_gene = pandas['results.genes']
87+
df_merge_1 = pd.merge(df_synonym, df_gene, left_on = "_unifair_ref", right_on='_unifair_id', how = "right")
88+
df_merge_1 = df_merge_1.loc[:, ['value', '_unifair_ref_y']]
89+
df_merge_1.columns = ['synomym', '_unifair_ref']
90+
df_merge_1['_unifair_ref'].replace('results.', '', inplace = True, regex = True)
91+
92+
# print(df_gene)
93+
94+
# Get keywords table and clean foreign key
95+
df_keywords = pandas['results.keywords']
96+
df_keywords['_unifair_ref'].replace('results.', '', inplace = True, regex = True)
97+
df_keywords = df_keywords.loc[:, ['_unifair_ref', 'category', 'name']]
98+
99+
# Merge keywords with synonym
100+
df_merge_2 = pd.merge(df_merge_1, df_keywords, on = "_unifair_ref", how = "right")
101+
102+
# Get results table for regene name and primary accession
103+
df_results = pandas['results']
104+
df_results = df_results.loc[:, ['_unifair_id', 'primaryAccession', 'uniProtkbId']]
105+
df_merge_final = pd.merge(df_merge_2, df_results, left_on = "_unifair_ref", right_on='_unifair_id', how = "right")
106+
82107
out_dataset = PandasDataset()
83-
out_dataset['my_table'] = df
108+
out_dataset['my_table'] = df_merge_final
109+
110+
print(len(df_results.index))
111+
112+
84113
return out_dataset
85114

86115

0 commit comments

Comments
 (0)