Skip to content

Commit 1bbbe24

Browse files
committed
Add score property to edge. Swap positions of ln_ratio and rel freq resul objects to match current COHD API attribute ordering
1 parent 1a25455 commit 1bbbe24

File tree

1 file changed

+43
-41
lines changed

1 file changed

+43
-41
lines changed

kgx/kgx_cohd.py

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def clip(x, clip):
235235

236236
# Read OMOP concep definitions
237237
omop_concepts = dict()
238-
with open(path.join(DIR_DATA, 'concepts.tsv'), 'r') as f_concepts:
238+
with open(path.join(DIR_DATA, 'concepts.tsv'), 'r', encoding='utf-8') as f_concepts:
239239
# skip header line
240240
f_concepts.readline()
241241
while line := f_concepts.readline():
@@ -388,6 +388,7 @@ def clip(x, clip):
388388
rel_freq_study_value = f"Relative to {curie_subj}: {rel_freq_subj_value:.3f} [{rel_freq_subj_ci_value[0]:.3f}, {rel_freq_subj_ci_value[1]:.3f}]; " \
389389
f"Relative to {curie_obj}: {rel_freq_obj_value:.3f} [{rel_freq_obj_ci_value[0]:.3f}, {rel_freq_obj_ci_value[1]:.3f}]"
390390
log_odds_study_value = f"{lo:.3f} [{lo_ci[0]:.3f}, {lo_ci[1]:.3f}]"
391+
score = lnr_ci[0] if lnr > 0 else -lnr_ci[1]
391392

392393
# Build attributes
393394
attributes = [
@@ -514,42 +515,34 @@ def clip(x, clip):
514515
{
515516
"attribute_source": INFORES_ID,
516517
"attribute_type_id": "biolink:has_supporting_study_result",
517-
"description": "A study result describing a relative frequency anaylsis on a single pair of concepts",
518-
"value": rel_freq_study_value,
519-
"value_type_id": "biolink:RelativeFrequencyAnalysisResult",
518+
"description": "A study result describing an observed-expected frequency anaylsis on a single pair of concepts",
519+
"value": oefr_study_value,
520+
"value_type_id": "biolink:ObservedExpectedFrequencyAnalysisResult",
520521
'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/COHD-KP',
521522
"attributes": [
522523
{
523-
'attribute_type_id': 'biolink:relative_frequency_subject',
524-
'original_attribute_name': 'relative_frequency_subject',
525-
'value': rel_freq_subj_value,
526-
'value_type_id': 'EDAM:data_1772', # Score
527-
'attribute_source': INFORES_ID,
528-
'description': f'Relative frequency, relative to the subject node ({curie_subj}).'
529-
},
530-
{
531-
'attribute_type_id': 'biolink:relative_frequency_subject_confidence_interval',
532-
'original_attribute_name': 'relative_freq_subject_confidence_interval',
533-
'value': rel_freq_subj_ci_value,
534-
'value_type_id': 'EDAM:data_0951', # Statistical estimate score
524+
'attribute_type_id': 'biolink:expected_count',
525+
'original_attribute_name': 'expected_count',
526+
'value': count_expected,
527+
'value_type_id': 'EDAM:operation_3438',
535528
'attribute_source': INFORES_ID,
536-
'description': f'Relative frequency (subject) {CONFIDENCE*100}% confidence interval'
529+
'description': 'Calculated expected count of concept pair.'
537530
},
538531
{
539-
'attribute_type_id': 'biolink:relative_frequency_object',
540-
'original_attribute_name': 'relative_frequency_object',
541-
'value': rel_freq_obj_value,
532+
'attribute_type_id': 'biolink:ln_ratio',
533+
'original_attribute_name': 'ln_ratio',
534+
'value': lnr,
542535
'value_type_id': 'EDAM:data_1772', # Score
543536
'attribute_source': INFORES_ID,
544-
'description': f'Relative frequency, relative to the object node ({curie_obj}).'
537+
'description': 'Observed-expected frequency ratio.'
545538
},
546539
{
547-
'attribute_type_id': 'biolink:relative_frequency_object_confidence_interval',
548-
'original_attribute_name': 'relative_freq_object_confidence_interval',
549-
'value': rel_freq_obj_ci_value,
540+
'attribute_type_id': 'biolink:ln_ratio_confidence_interval',
541+
'original_attribute_name': 'ln_ratio_confidence_interval',
542+
'value': lnr_ci,
550543
'value_type_id': 'EDAM:data_0951', # Statistical estimate score
551544
'attribute_source': INFORES_ID,
552-
'description': f'Relative frequency (object) {CONFIDENCE*100}% confidence interval'
545+
'description': f'Observed-expected frequency ratio {CONFIDENCE*100}% confidence interval'
553546
},
554547
{
555548
'attribute_type_id': 'biolink:supporting_data_set', # Database ID
@@ -576,34 +569,42 @@ def clip(x, clip):
576569
{
577570
"attribute_source": INFORES_ID,
578571
"attribute_type_id": "biolink:has_supporting_study_result",
579-
"description": "A study result describing an observed-expected frequency anaylsis on a single pair of concepts",
580-
"value": oefr_study_value,
581-
"value_type_id": "biolink:ObservedExpectedFrequencyAnalysisResult",
572+
"description": "A study result describing a relative frequency anaylsis on a single pair of concepts",
573+
"value": rel_freq_study_value,
574+
"value_type_id": "biolink:RelativeFrequencyAnalysisResult",
582575
'value_url': 'https://github.com/NCATSTranslator/Translator-All/wiki/COHD-KP',
583576
"attributes": [
584577
{
585-
'attribute_type_id': 'biolink:expected_count',
586-
'original_attribute_name': 'expected_count',
587-
'value': count_expected,
588-
'value_type_id': 'EDAM:operation_3438',
578+
'attribute_type_id': 'biolink:relative_frequency_subject',
579+
'original_attribute_name': 'relative_frequency_subject',
580+
'value': rel_freq_subj_value,
581+
'value_type_id': 'EDAM:data_1772', # Score
589582
'attribute_source': INFORES_ID,
590-
'description': 'Calculated expected count of concept pair.'
583+
'description': f'Relative frequency, relative to the subject node ({curie_subj}).'
591584
},
592585
{
593-
'attribute_type_id': 'biolink:ln_ratio',
594-
'original_attribute_name': 'ln_ratio',
595-
'value': lnr,
586+
'attribute_type_id': 'biolink:relative_frequency_subject_confidence_interval',
587+
'original_attribute_name': 'relative_freq_subject_confidence_interval',
588+
'value': rel_freq_subj_ci_value,
589+
'value_type_id': 'EDAM:data_0951', # Statistical estimate score
590+
'attribute_source': INFORES_ID,
591+
'description': f'Relative frequency (subject) {CONFIDENCE*100}% confidence interval'
592+
},
593+
{
594+
'attribute_type_id': 'biolink:relative_frequency_object',
595+
'original_attribute_name': 'relative_frequency_object',
596+
'value': rel_freq_obj_value,
596597
'value_type_id': 'EDAM:data_1772', # Score
597598
'attribute_source': INFORES_ID,
598-
'description': 'Observed-expected frequency ratio.'
599+
'description': f'Relative frequency, relative to the object node ({curie_obj}).'
599600
},
600601
{
601-
'attribute_type_id': 'biolink:ln_ratio_confidence_interval',
602-
'original_attribute_name': 'ln_ratio_confidence_interval',
603-
'value': lnr_ci,
602+
'attribute_type_id': 'biolink:relative_frequency_object_confidence_interval',
603+
'original_attribute_name': 'relative_freq_object_confidence_interval',
604+
'value': rel_freq_obj_ci_value,
604605
'value_type_id': 'EDAM:data_0951', # Statistical estimate score
605606
'attribute_source': INFORES_ID,
606-
'description': f'Observed-expected frequency ratio {CONFIDENCE*100}% confidence interval'
607+
'description': f'Relative frequency (object) {CONFIDENCE*100}% confidence interval'
607608
},
608609
{
609610
'attribute_type_id': 'biolink:supporting_data_set', # Database ID
@@ -690,6 +691,7 @@ def clip(x, clip):
690691
'subject': biolink_id_1,
691692
'object': biolink_id_2,
692693
'predicate': predicate,
694+
'score': score,
693695
'attributes': attributes,
694696
'sources': [
695697
{

0 commit comments

Comments
 (0)