Skip to content
This repository was archived by the owner on Jan 13, 2023. It is now read-only.

Commit 7d0725d

Browse files
committed
support general cohort analytics
1 parent 8155384 commit 7d0725d

File tree

4 files changed

+388
-4
lines changed

4 files changed

+388
-4
lines changed

autoimmune.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ def get_concepts(output_file):
6969

7070

7171
if __name__ == "__main__":
72-
get_concepts('autoimmune_results_v2.csv')
72+
get_concepts('autoimmune_results_v3.csv')

cohortanalysis.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import utils
2+
import sqldbutils as dutil
3+
import json
4+
import ontotextapi as oi
5+
6+
# query concept sql
7+
autoimmune_concepts_sql = """
8+
select distinct concept_name from [SQLCRIS_User].[Kconnect].[ulms_concept_mapping]
9+
"""
10+
11+
# query patient sql
12+
patients_sql = """
13+
select brcid, primary_diag, diagnosis_date, dob, gender_id, ethnicitycleaned from
14+
[SQLCRIS_User].[Kconnect].[cohorts]
15+
where patient_group='{}'
16+
"""
17+
18+
# query concept freqs over patient
19+
concept_doc_freq_sql = """
20+
select p.brcid, COUNT(distinct a.CN_Doc_ID) num
21+
from [SQLCRIS_User].[Kconnect].[cohorts] c, [SQLCRIS_User].Kconnect.kconnect_annotations a, GateDB_Cris.dbo.gate d
22+
where
23+
a.inst_uri='{0}'
24+
and a.CN_Doc_ID = d.CN_Doc_ID
25+
and c.brcid = d.BrcId
26+
and c.patient_group='{1}'
27+
group by c.brcid
28+
"""
29+
30+
31+
def populate_patient_concept_table(cohort_name, concepts, out_file):
32+
patients = []
33+
dutil.query_data(patients_sql.format(cohort_name), patients)
34+
id2p = {}
35+
for p in patients:
36+
id2p[p['brcid']] = p
37+
38+
non_empty_concepts = []
39+
for c in concepts:
40+
patient_concept_freq = []
41+
dutil.query_data(concept_doc_freq_sql.format(c, cohort_name), patient_concept_freq)
42+
if len(patient_concept_freq) > 0:
43+
non_empty_concepts.append(c)
44+
for pc in patient_concept_freq:
45+
id2p[pc['brcid']][c] = pc['num']
46+
47+
label2cid = {}
48+
concept_labels = []
49+
for c in non_empty_concepts:
50+
label2cid[oi.get_concept_label(c)] = c
51+
concept_labels.append(c)
52+
concept_labels = sorted(concept_labels)
53+
s = '\t'.join(['brcid'] + non_empty_concepts) + '\n'
54+
for p in patients:
55+
s += '\t'.join([p['brcid']] + [p[label2cid[k]] for k in concept_labels]) + '\n'
56+
utils.save_string(s, out_file)
57+
print 'done'
58+
59+
60+
if __name__ == "__main__":
61+
concepts = utils.load_json_data('./resources/cardiovascular_concepts.json')
62+
populate_patient_concept_table('valproic acid patients', concepts, 'heart_cohorts.csv')

ontotextapi.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
}}
2424
"""
2525

26+
subconcepts_only_query_tmp = """
27+
SELECT ?inst_full WHERE {{
28+
<http://linkedlifedata.com/resource/umls/id/{}> <http://www.w3.org/2004/02/skos/core#narrower> ?inst_full .
29+
}}
30+
"""
31+
2632
# mimir query template
2733
mimir_query_temp = """
2834
{{Mention sparql = "{}"}}
@@ -76,7 +82,8 @@ def generate_prospector_query(concept_id, sparql_only=None):
7682

7783
# query to get all instances of a concept
7884
def query_instances(concept_id):
79-
q = generate_prospector_query(concept_id, sparql_only=True)
85+
# q = generate_prospector_query(concept_id, sparql_only=True)
86+
q = subconcepts_only_query_tmp.format(concept_id)
8087
# print q
8188
ret = query_result(q)
8289
return [r['inst_full']['value'] for r in ret]
@@ -89,9 +96,10 @@ def get_all_instances(save_file):
8996
if concepts[c] == '':
9097
continue
9198
insts = query_instances(concepts[c])
99+
insts = [concepts[c]] + insts
92100
print u'{}\t{}\t{}\t{}'.format(c, concepts[c], len(insts), json.dumps(insts))
93101
for cid in insts:
94-
concpet2subconcepts_csv += u'"{}", "{}"\n'.format(c, cid[cid.rfind('/')+1:])
102+
concpet2subconcepts_csv += u'{}, {}\n'.format(c, cid[cid.rfind('/')+1:])
95103
if save_file is not None:
96104
utils.save_string(concpet2subconcepts_csv, save_file)
97105

@@ -110,9 +118,26 @@ def generate_all_queries():
110118
utils.save_json_array(concept2queries, './resources/mimir_queries.json')
111119

112120

121+
def get_concept_label(concept_id):
122+
query2 = """
123+
select ?label where {{
124+
<http://linkedlifedata.com/resource/umls/id/{}> <http://www.w3.org/2008/05/skos-xl#prefLabel> ?labelObj .
125+
?labelObj <http://www.w3.org/2008/05/skos-xl#literalForm> ?label .
126+
FILTER ( lang(?label) = "en" )
127+
}}
128+
""".format(concept_id)
129+
ret = query_result(query2)
130+
labels = [r['label']['value'] for r in ret]
131+
132+
if len(labels) > 0:
133+
return labels[0]
134+
else:
135+
return None
136+
113137
def main():
114138
# generate_all_queries()
115-
get_all_instances('./resources/all_insts.csv')
139+
# get_all_instances('./resources/all_insts.csv')
140+
print get_concept_label('C0018799')
116141

117142
if __name__ == "__main__":
118143
main()

0 commit comments

Comments
 (0)