Skip to content

Commit cc7c566

Browse files
committed
Scripts for generating KGX for Automat deployment of COHD
1 parent cf76379 commit cc7c566

File tree

3 files changed

+789
-0
lines changed

3 files changed

+789
-0
lines changed

kgx/dump_cohd_mysql.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env bash
2+
3+
# Warning: this script uses command line for password, which is not safe in a shared environment
4+
# Prompt for MySQL password
5+
read -sp "Enter MySQL password: " password
6+
echo # Move to a new line after input
7+
8+
# Create output directory from today's date
9+
dir_name=$(date +%Y%m%d)
10+
mkdir $dir_name
11+
cd $dir_name
12+
13+
date
14+
15+
# Dump out mapping and concept data
16+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT * FROM biolink.mappings;" > mappings.tsv
17+
date
18+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT concept_id, domain_id, concept_name FROM cohd.concept; " > concepts.tsv
19+
date
20+
21+
# Dump dataset 1 (takes about 2 minutes)
22+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE ((c1.domain_id != 'Drug' AND c2.domain_id != 'Drug') AND cp.dataset_id = 1);" > counts_ds1.tsv
23+
date
24+
25+
# Split up dataset 3 by domain pairs. The following all work, 5-20 min each
26+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE c1.domain_id = 'Condition' AND c2.domain_id = 'Drug' AND cp.dataset_id = 3;" > counts_cd.tsv
27+
date
28+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE c1.domain_id = 'Drug' AND c2.domain_id = 'Condition' AND cp.dataset_id = 3;" > counts_dc.tsv
29+
date
30+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE c1.domain_id = 'Drug' AND c2.domain_id = 'Drug' AND cp.dataset_id = 3;" > counts_dd.tsv
31+
date
32+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE c1.domain_id = 'Drug' AND c2.domain_id = 'Procedure' AND cp.dataset_id = 3;" > counts_dp.tsv
33+
date
34+
mysql -h tr-kp-clinical-db.ncats.io -u admin --password=$password --connect-timeout=3600 -e "SELECT cp.concept_id_1, cp.concept_id_2, cc1.concept_count AS concept_1_count, cc2.concept_count AS concept_2_count, cp.concept_count AS concept_pair_count, cp.dataset_id FROM cohd.concept_pair_counts cp JOIN biolink.mappings bm1 ON cp.concept_id_1 = bm1.omop_id JOIN biolink.mappings bm2 ON cp.concept_id_2 = bm2.omop_id JOIN cohd.concept_counts cc1 ON cp.concept_id_1 = cc1.concept_id AND cp.dataset_id = cc1.dataset_id JOIN cohd.concept_counts cc2 ON cp.concept_id_2 = cc2.concept_id AND cp.dataset_id = cc2.dataset_id JOIN cohd.patient_count pc ON cp.dataset_id = pc.dataset_id JOIN cohd.concept c1 ON cp.concept_id_1 = c1.concept_id JOIN cohd.concept c2 ON cp.concept_id_2 = c2.concept_id WHERE c1.domain_id = 'Procedure' AND c2.domain_id = 'Drug' AND cp.dataset_id = 3;" > counts_pd.tsv
35+
date

0 commit comments

Comments
 (0)