Skip to content

Commit 3e2dd8b

Browse files
committed
feat: new chebi annotations
First, chebi patterns of existing annotations are either: 1) removed if the came from Sco4 (many were wrong) 2) reformatted to the pattern CHEBI:\d+ Closes one of the tasks in #30 The, I added new chebi annotations to metabolites based on the metanetx id, and used a library called libchebipy to avoid secondary IDs. Still (only) 202 metabolites without chebi annotation.
1 parent 74fc6c8 commit 3e2dd8b

File tree

6 files changed

+5675
-3054
lines changed

6 files changed

+5675
-3054
lines changed

ComplementaryData/curation/chebi_annotation.csv

Lines changed: 1824 additions & 0 deletions
Large diffs are not rendered by default.

ComplementaryScripts/consensusModel/fix_issue33_annotation_bugs.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ def fix_misc(scoGEM):
9999
scoGEM.metabolites.get_by_id("4gglutbut_c").annotation["kegg.compound"] = "C15700"
100100
scoGEM.metabolites.get_by_id("4gglutbut_c").annotation["metanetx.chemical"] = "MNXM1378"
101101

102+
scoGEM.metabolites.get_by_id("xylan_e").annotation["kegg.compound"] = "C00707"
103+
104+
105+
102106

103107

104108
def annotate_germicidin_pathway(scoGEM):
@@ -116,6 +120,10 @@ def annotate_germicidin_pathway(scoGEM):
116120

117121

118122
def fix_metanetx_annotations(scoGEM, met_to_metanetx_fn):
123+
"""
124+
This csv file used for the mapping is created using the function
125+
'map_model_metabolites' in the map_to_metanetx.py script.
126+
"""
119127
df = pd.read_csv(met_to_metanetx_fn, index_col = 0)
120128
for i, row in df.iterrows():
121129
m_id = row[0]
@@ -135,6 +143,22 @@ def fix_metanetx_annotations(scoGEM, met_to_metanetx_fn):
135143
logging.info("Changed metanetx.chemical annotation of metabolite {0} from {1} to {2}".format(
136144
m.id, old_anno, m.annotation["metanetx.chemical"]))
137145

146+
def apply_new_chebi_annotations(scoGEM, chebi_annotation_fn):
147+
"""
148+
This csv file used for the mapping is created using the function
149+
'map_metabolites_to_chebi' in the map_to_metanetx.py script.
150+
"""
151+
df = pd.read_csv(chebi_annotation_fn, index_col = None)
152+
for i, row in df.iterrows():
153+
m_id = row["Met ID"]
154+
new_annotation = row["New chebi annotation"]
155+
156+
m = scoGEM.metabolites.get_by_id(m_id)
157+
if new_annotation is not None:
158+
m.annotation["chebi"] = new_annotation
159+
160+
logging.info("Changed chebi annotation of metabolite {0} from {1} to {2}".format(
161+
m.id, row[2], new_annotation))
138162

139163
def fix_wrong_chebi_mapping(scoGEM):
140164
for m in scoGEM.metabolites:
@@ -188,4 +212,5 @@ def fix_mmy_bug(scoGEM):
188212
if __name__ == '__main__':
189213
model = cobra.io.read_sbml_model("../../ModelFiles/xml/scoGEM.xml")
190214
# fix_annotations(model)
191-
fix_demand_biocyc_names(model)
215+
# fix_demand_biocyc_names(model)
216+
apply_new_chebi_annotations(model, "../../ComplementaryData/curation/chebi_annotation.csv")

ComplementaryScripts/consensusModel/map_to_metanetx.py

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import cobra
22
import pandas as pd
33
from pathlib import Path
4+
import libchebipy
45

56
def map_model_metabolites(model, metanetx_fn):
67
df = pd.read_csv(metanetx_fn, header = None, sep = "\t", comment = "#")
@@ -59,9 +60,82 @@ def map_model_metabolites(model, metanetx_fn):
5960
# print(new_df_list)
6061

6162
new_df = pd.DataFrame(new_df_list, columns = ["Met ID", "MNX 1", "MNX 2"])
62-
new_df.to_csv("metanetx_to_change.csv", index_label = "index")
63+
new_df.to_csv("../../ComplementaryData/curation/metanetx_to_change.csv", index_label = "index")
6364
# print(new_df)
6465

66+
def map_metabolites_to_chebi(scoGEM, metanetx_fn):
67+
df = pd.read_csv(metanetx_fn, header = None, sep = "\t", comment = "#")
68+
df.columns = ["db:id", "metanetx", "reason", "name"]
69+
70+
chebi_df = df[df["db:id"].str.contains("chebi:")]
71+
del df
72+
73+
new_df_list = []
74+
for m in model.metabolites:
75+
try:
76+
mnx_annot = m.annotation["metanetx.chemical"]
77+
except KeyError:
78+
print("No metanetx annotation for {0}, {1}".format(m.id, ["{0}:{1}".format(key, value) for key, value in m.annotation.items()]))
79+
continue
80+
81+
mnx_annot = as_list(mnx_annot)
82+
83+
chebi_ids = []
84+
for mnx_i in mnx_annot:
85+
mnx_match = chebi_df.loc[chebi_df["metanetx"] == mnx_i]
86+
chebi_ids += list(mnx_match["db:id"].values)
87+
chebi_ids = [x.upper() for x in chebi_ids]
88+
89+
parent_chebis = []
90+
for chebi_id in list(set(chebi_ids)):
91+
lib_data = libchebipy.ChebiEntity(chebi_id.upper())
92+
parent = lib_data.get_parent_id()
93+
if parent:
94+
parent_chebis.append(parent)
95+
else:
96+
parent_chebis.append(chebi_id.upper())
97+
parent_chebis = list(set(parent_chebis))
98+
try:
99+
current_chebi_list = as_list(m.annotation["chebi"])
100+
except:
101+
current_chebi_list = [None]
102+
in_new_chebis = False
103+
else:
104+
in_new_chebis = True
105+
for current_chebi in current_chebi_list:
106+
if not current_chebi in chebi_ids:
107+
in_new_chebis = False
108+
print("{2}: {0} is not in the new set {1}".format(current_chebi, parent_chebis, m.id))
109+
110+
new_df_list.append([m.id, parent_chebis, current_chebi_list, in_new_chebis])
111+
new_df = pd.DataFrame(new_df_list, columns = ["Met ID", "New chebi annotation", "Current chebi annotation", "Old chebi in new (including secondary chebis)"])
112+
new_df.to_csv("../../ComplementaryData/curation/chebi_annotation.csv", index = False)
113+
114+
def as_list(param):
115+
if isinstance(param, list):
116+
return param
117+
else:
118+
return [param]
119+
120+
121+
# chebi_dict[m.id] = set(chebi_ids)
122+
# all_chebis += chebi_ids
123+
124+
# # Remove duplicates
125+
# all_chebis = list(set(all_chebis))
126+
# parent_child_dict = {}
127+
# for chebi_id in all_chebis:
128+
# lib_data = libchebipy.ChebiEntity(chebi_id.upper())
129+
# print(chebi_id.upper(), lib_data)
130+
# parent = lib_data.get_parent_id()
131+
# if parent is not None:
132+
# parent_child_dict[chebi_id.upper()] = parent
133+
134+
135+
# Use libchebipy to find parent IDs
136+
# a = libchebipy.ChebiEntity
137+
# a.get_parent_id()
138+
# a.
65139
def apply_metanetx_mapping(scoGEM, met_to_metanetx_fn):
66140
"""
67141
Depreceated: moved to fix_issue33_annotation_bugs.py
@@ -86,11 +160,17 @@ def apply_metanetx_mapping(scoGEM, met_to_metanetx_fn):
86160
m.id, old_anno, m.annotation["metanetx.chemical"]))
87161

88162

163+
164+
89165
if __name__ == '__main__':
90166
repo_path = Path(__file__).parent.parent.parent
91167
metanetx_fn = repo_path / "ComplementaryData" / "curation" / "metanetx_chem_xref.tsv"
92168
model_fn = repo_path / "ModelFiles" / "xml" / "scoGEM.xml"
93169
model = cobra.io.read_sbml_model(str(model_fn))
94170
# map_model_metabolites(model, metanetx_fn)
95171
fn = repo_path / "ComplementaryData" / "curation" /"metanetx_to_change.csv"
96-
apply_metanetx_mapping(model, fn)
172+
if 0:
173+
apply_metanetx_mapping(model, fn)
174+
175+
if 1:
176+
map_metabolites_to_chebi(model, metanetx_fn)

ComplementaryScripts/reconstruct_scoGEM.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
iAA1259_NEW_REACTIONS_FN = "../ComplementaryData/curation/iAA1259_suppl_S4.csv" # New reactions
4545

4646
MET_TO_METANETX_FN = str(REPO_DIR / "ComplementaryData" / "curation" /"metanetx_to_change.csv")
47+
MET_TO_CHEBI_FN = str(REPO_DIR / "ComplementaryData" / "curation" /"chebi_annotation.csv")
4748
NEW_BIOMASS_DATA_FN = "../ComplementaryData/biomass/biomass_scaled.txt"
4849

4950
def reconstruct_scoGEM(model_fn, save_fn = None):
@@ -87,6 +88,7 @@ def reconstruct_scoGEM(model_fn, save_fn = None):
8788
fix_SBO_terms.add_SBO(scoGEM)
8889
fix_issue33_annotation_bugs.fix_metanetx_annotations(scoGEM, MET_TO_METANETX_FN)
8990
fix_biomass.fix_biomass(scoGEM, NEW_BIOMASS_DATA_FN)
91+
fix_issue33_annotation_bugs.apply_new_chebi_annotations(scoGEM, MET_TO_CHEBI_FN)
9092

9193
# Save model
9294
export.export(scoGEM, formats = ["xml", "yml"])

0 commit comments

Comments
 (0)