@@ -235,7 +235,7 @@ def clip(x, clip):
235235
236236# Read OMOP concep definitions
237237omop_concepts = dict ()
238- with open (path .join (DIR_DATA , 'concepts.tsv' ), 'r' ) as f_concepts :
238+ with open (path .join (DIR_DATA , 'concepts.tsv' ), 'r' , encoding = 'utf-8' ) as f_concepts :
239239 # skip header line
240240 f_concepts .readline ()
241241 while line := f_concepts .readline ():
@@ -388,6 +388,7 @@ def clip(x, clip):
388388 rel_freq_study_value = f"Relative to { curie_subj } : { rel_freq_subj_value :.3f} [{ rel_freq_subj_ci_value [0 ]:.3f} , { rel_freq_subj_ci_value [1 ]:.3f} ]; " \
389389 f"Relative to { curie_obj } : { rel_freq_obj_value :.3f} [{ rel_freq_obj_ci_value [0 ]:.3f} , { rel_freq_obj_ci_value [1 ]:.3f} ]"
390390 log_odds_study_value = f"{ lo :.3f} [{ lo_ci [0 ]:.3f} , { lo_ci [1 ]:.3f} ]"
391+ score = lnr_ci [0 ] if lnr > 0 else - lnr_ci [1 ]
391392
392393 # Build attributes
393394 attributes = [
@@ -514,42 +515,34 @@ def clip(x, clip):
514515 {
515516 "attribute_source" : INFORES_ID ,
516517 "attribute_type_id" : "biolink:has_supporting_study_result" ,
517- "description" : "A study result describing a relative frequency anaylsis on a single pair of concepts" ,
518- "value" : rel_freq_study_value ,
519- "value_type_id" : "biolink:RelativeFrequencyAnalysisResult " ,
518+ "description" : "A study result describing an observed-expected frequency anaylsis on a single pair of concepts" ,
519+ "value" : oefr_study_value ,
520+ "value_type_id" : "biolink:ObservedExpectedFrequencyAnalysisResult " ,
520521 'value_url' : 'https://github.com/NCATSTranslator/Translator-All/wiki/COHD-KP' ,
521522 "attributes" : [
522523 {
523- 'attribute_type_id' : 'biolink:relative_frequency_subject' ,
524- 'original_attribute_name' : 'relative_frequency_subject' ,
525- 'value' : rel_freq_subj_value ,
526- 'value_type_id' : 'EDAM:data_1772' , # Score
527- 'attribute_source' : INFORES_ID ,
528- 'description' : f'Relative frequency, relative to the subject node ({ curie_subj } ).'
529- },
530- {
531- 'attribute_type_id' : 'biolink:relative_frequency_subject_confidence_interval' ,
532- 'original_attribute_name' : 'relative_freq_subject_confidence_interval' ,
533- 'value' : rel_freq_subj_ci_value ,
534- 'value_type_id' : 'EDAM:data_0951' , # Statistical estimate score
524+ 'attribute_type_id' : 'biolink:expected_count' ,
525+ 'original_attribute_name' : 'expected_count' ,
526+ 'value' : count_expected ,
527+ 'value_type_id' : 'EDAM:operation_3438' ,
535528 'attribute_source' : INFORES_ID ,
536- 'description' : f'Relative frequency (subject) { CONFIDENCE * 100 } % confidence interval '
529+ 'description' : 'Calculated expected count of concept pair. '
537530 },
538531 {
539- 'attribute_type_id' : 'biolink:relative_frequency_object ' ,
540- 'original_attribute_name' : 'relative_frequency_object ' ,
541- 'value' : rel_freq_obj_value ,
532+ 'attribute_type_id' : 'biolink:ln_ratio ' ,
533+ 'original_attribute_name' : 'ln_ratio ' ,
534+ 'value' : lnr ,
542535 'value_type_id' : 'EDAM:data_1772' , # Score
543536 'attribute_source' : INFORES_ID ,
544- 'description' : f'Relative frequency, relative to the object node ( { curie_obj } ) .'
537+ 'description' : 'Observed-expected frequency ratio .'
545538 },
546539 {
547- 'attribute_type_id' : 'biolink:relative_frequency_object_confidence_interval ' ,
548- 'original_attribute_name' : 'relative_freq_object_confidence_interval ' ,
549- 'value' : rel_freq_obj_ci_value ,
540+ 'attribute_type_id' : 'biolink:ln_ratio_confidence_interval ' ,
541+ 'original_attribute_name' : 'ln_ratio_confidence_interval ' ,
542+ 'value' : lnr_ci ,
550543 'value_type_id' : 'EDAM:data_0951' , # Statistical estimate score
551544 'attribute_source' : INFORES_ID ,
552- 'description' : f'Relative frequency (object) { CONFIDENCE * 100 } % confidence interval'
545+ 'description' : f'Observed-expected frequency ratio { CONFIDENCE * 100 } % confidence interval'
553546 },
554547 {
555548 'attribute_type_id' : 'biolink:supporting_data_set' , # Database ID
@@ -576,34 +569,42 @@ def clip(x, clip):
576569 {
577570 "attribute_source" : INFORES_ID ,
578571 "attribute_type_id" : "biolink:has_supporting_study_result" ,
579- "description" : "A study result describing an observed-expected frequency anaylsis on a single pair of concepts" ,
580- "value" : oefr_study_value ,
581- "value_type_id" : "biolink:ObservedExpectedFrequencyAnalysisResult " ,
572+ "description" : "A study result describing a relative frequency anaylsis on a single pair of concepts" ,
573+ "value" : rel_freq_study_value ,
574+ "value_type_id" : "biolink:RelativeFrequencyAnalysisResult " ,
582575 'value_url' : 'https://github.com/NCATSTranslator/Translator-All/wiki/COHD-KP' ,
583576 "attributes" : [
584577 {
585- 'attribute_type_id' : 'biolink:expected_count ' ,
586- 'original_attribute_name' : 'expected_count ' ,
587- 'value' : count_expected ,
588- 'value_type_id' : 'EDAM:operation_3438' ,
578+ 'attribute_type_id' : 'biolink:relative_frequency_subject ' ,
579+ 'original_attribute_name' : 'relative_frequency_subject ' ,
580+ 'value' : rel_freq_subj_value ,
581+ 'value_type_id' : 'EDAM:data_1772' , # Score
589582 'attribute_source' : INFORES_ID ,
590- 'description' : 'Calculated expected count of concept pair .'
583+ 'description' : f'Relative frequency, relative to the subject node ( { curie_subj } ) .'
591584 },
592585 {
593- 'attribute_type_id' : 'biolink:ln_ratio' ,
594- 'original_attribute_name' : 'ln_ratio' ,
595- 'value' : lnr ,
586+ 'attribute_type_id' : 'biolink:relative_frequency_subject_confidence_interval' ,
587+ 'original_attribute_name' : 'relative_freq_subject_confidence_interval' ,
588+ 'value' : rel_freq_subj_ci_value ,
589+ 'value_type_id' : 'EDAM:data_0951' , # Statistical estimate score
590+ 'attribute_source' : INFORES_ID ,
591+ 'description' : f'Relative frequency (subject) { CONFIDENCE * 100 } % confidence interval'
592+ },
593+ {
594+ 'attribute_type_id' : 'biolink:relative_frequency_object' ,
595+ 'original_attribute_name' : 'relative_frequency_object' ,
596+ 'value' : rel_freq_obj_value ,
596597 'value_type_id' : 'EDAM:data_1772' , # Score
597598 'attribute_source' : INFORES_ID ,
598- 'description' : 'Observed-expected frequency ratio .'
599+ 'description' : f'Relative frequency, relative to the object node ( { curie_obj } ) .'
599600 },
600601 {
601- 'attribute_type_id' : 'biolink:ln_ratio_confidence_interval ' ,
602- 'original_attribute_name' : 'ln_ratio_confidence_interval ' ,
603- 'value' : lnr_ci ,
602+ 'attribute_type_id' : 'biolink:relative_frequency_object_confidence_interval ' ,
603+ 'original_attribute_name' : 'relative_freq_object_confidence_interval ' ,
604+ 'value' : rel_freq_obj_ci_value ,
604605 'value_type_id' : 'EDAM:data_0951' , # Statistical estimate score
605606 'attribute_source' : INFORES_ID ,
606- 'description' : f'Observed-expected frequency ratio { CONFIDENCE * 100 } % confidence interval'
607+ 'description' : f'Relative frequency (object) { CONFIDENCE * 100 } % confidence interval'
607608 },
608609 {
609610 'attribute_type_id' : 'biolink:supporting_data_set' , # Database ID
@@ -690,6 +691,7 @@ def clip(x, clip):
690691 'subject' : biolink_id_1 ,
691692 'object' : biolink_id_2 ,
692693 'predicate' : predicate ,
694+ 'score' : score ,
693695 'attributes' : attributes ,
694696 'sources' : [
695697 {
0 commit comments