9.BiGG_to_ecGEM
Batch construction of ecGEM from BiGG models
Download BiGG Model
!mkdir ./data/BIGG_model
# ignore model
wget http://bigg.ucsd.edu/static/models/iAB_RBC_283.json#Homo sapiens
wget http://bigg.ucsd.edu/static/models/iAT_PLT_636.json#Homo sapiens
wget http://bigg.ucsd.edu/static/models/RECON1.json#Homo sapiens
wget http://bigg.ucsd.edu/static/models/Recon3D.json#Homo sapiens
wget http://bigg.ucsd.edu/static/models/iMM1415.json#Mus musculus
wget http://bigg.ucsd.edu/static/models/e_coli_core.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iAF1260b.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iEC1356_Bl21DE3.json#Escherichia coli BL21(DE3)
wget http://bigg.ucsd.edu/static/models/iECD_1391.json# Escherichia coli BL21(DE3)
wget http://bigg.ucsd.edu/static/models/iECDH1ME8569_1439.json#Escherichia coli DH1
wget http://bigg.ucsd.edu/static/models/iAF1260.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iAPECO1_1312.json#Escherichia coli APEC O1
wget http://bigg.ucsd.edu/static/models/iBWG_1329.json#Escherichia coli BW2952
wget http://bigg.ucsd.edu/static/models/ic_1306.json#Escherichia coli CFT073
wget http://bigg.ucsd.edu/static/models/iE2348C_1286.json#Escherichia coli O127:H6 str. E2348/69
wget http://bigg.ucsd.edu/static/models/iEC042_1314.json#Escherichia coli 042
wget http://bigg.ucsd.edu/static/models/iEC1344_C.json#Escherichia coli C
wget http://bigg.ucsd.edu/static/models/iEC1349_Crooks.json#Escherichia coli ATCC 8739
wget http://bigg.ucsd.edu/static/models/iEC1364_W.json#Escherichia coli W
wget http://bigg.ucsd.edu/static/models/iEC1368_DH5a.json#Escherichia coli DH5[alpha]
wget http://bigg.ucsd.edu/static/models/iB21_1397.json#Escherichia coli BL21(DE3)
wget http://bigg.ucsd.edu/static/models/iEC55989_1330.json#Escherichia coli 55989
wget http://bigg.ucsd.edu/static/models/iECABU_c1320.json#Escherichia coli ABU 83972
wget http://bigg.ucsd.edu/static/models/iECB_1328.json#Escherichia coli B str. REL606
wget http://bigg.ucsd.edu/static/models/iECBD_1354.json#Escherichia coli 'BL21-Gold(DE3)pLysS AG'
wget http://bigg.ucsd.edu/static/models/iECDH10B_1368.json#Escherichia coli str. K-12 substr. DH10B
wget http://bigg.ucsd.edu/static/models/iEcDH1_1363.json#Escherichia coli DH1
wget http://bigg.ucsd.edu/static/models/iEcE24377_1341.json#Escherichia coli O139:H28 str. E24377A
wget http://bigg.ucsd.edu/static/models/iECED1_1282.json#Escherichia coli ED1a
wget http://bigg.ucsd.edu/static/models/iECH74115_1262.json#Escherichia coli O157:H7 str. EC4115
wget http://bigg.ucsd.edu/static/models/iEcHS_1320.json#Escherichia coli HS
wget http://bigg.ucsd.edu/static/models/iECIAI1_1343.json#Escherichia coli IAI1
wget http://bigg.ucsd.edu/static/models/iECIAI39_1322.json#Escherichia coli IAI39
wget http://bigg.ucsd.edu/static/models/iECNA114_1301.json#Escherichia coli NA114
wget http://bigg.ucsd.edu/static/models/iECO103_1326.json#Escherichia coli O103:H2 str. 12009
wget http://bigg.ucsd.edu/static/models/iECO111_1330.json#Escherichia coli O111:H- str. 11128
wget http://bigg.ucsd.edu/static/models/iECO26_1355.json#Escherichia coli O26:H11 str. 11368
wget http://bigg.ucsd.edu/static/models/iECOK1_1307.json#Escherichia coli IHE3034
wget http://bigg.ucsd.edu/static/models/iEcolC_1368.json#Escherichia coli ATCC 8739
wget http://bigg.ucsd.edu/static/models/iECP_1309.json#Escherichia coli 536
wget http://bigg.ucsd.edu/static/models/iECs_1301.json#Escherichia coli O157:H7 str. Sakai
wget http://bigg.ucsd.edu/static/models/iECS88_1305.json#Escherichia coli S88
wget http://bigg.ucsd.edu/static/models/iECSE_1348.json#Escherichia coli SE11
wget http://bigg.ucsd.edu/static/models/iECSF_1327.json#Escherichia coli SE15
wget http://bigg.ucsd.edu/static/models/iEcSMS35_1347.json#Escherichia coli SMS-3-5
wget http://bigg.ucsd.edu/static/models/iECSP_1301.json#Escherichia coli O157:H7 str. TW14359
wget http://bigg.ucsd.edu/static/models/iECUMN_1333.json#Escherichia coli UMN026
wget http://bigg.ucsd.edu/static/models/iECW_1372.json#Escherichia coli W
wget http://bigg.ucsd.edu/static/models/iEKO11_1354.json# Escherichia coli KO11FL
wget http://bigg.ucsd.edu/static/models/iETEC_1333.json#Escherichia coli ETEC H10407
wget http://bigg.ucsd.edu/static/models/iG2583_1286.json# Escherichia coli O55:H7 str. CB9615
wget http://bigg.ucsd.edu/static/models/iJO1366.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iJR904.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iLF82_1304.json#Escherichia coli LF82
wget http://bigg.ucsd.edu/static/models/iML1515.json#Escherichia coli str. K-12 substr. MG1655
wget http://bigg.ucsd.edu/static/models/iUMN146_1321.json#Escherichia coli UM146
wget http://bigg.ucsd.edu/static/models/iUMNK88_1353.json#Escherichia coli UMNK88
wget http://bigg.ucsd.edu/static/models/iUTI89_1310.json#Escherichia coli UTI89
wget http://bigg.ucsd.edu/static/models/iWFL_1372.json#Escherichia coli W
wget http://bigg.ucsd.edu/static/models/iY75_1357.json# Escherichia coli str. K-12 substr. W3110
wget http://bigg.ucsd.edu/static/models/iZ_1308.json# Escherichia coli O157:H7 str. EDL933
wget http://bigg.ucsd.edu/static/models/iNRG857_1313.json#Escherichia coli O83:H1 str. NRG 857C
wget http://bigg.ucsd.edu/static/models/iEC1372_W3110.json#Escherichia coli str. K-12 substr. W3110
wget http://bigg.ucsd.edu/static/models/iIS312_Amastigote.json#Trypanosoma cruzi Dm28c
wget http://bigg.ucsd.edu/static/models/iIS312_Epimastigote.json#Trypanosoma cruzi Dm28c
wget http://bigg.ucsd.edu/static/models/iIS312_Trypomastigote.json#Trypanosoma cruzi Dm28c
wget http://bigg.ucsd.edu/static/models/iAM_Pb448.json#Plasmodium berghei
wget http://bigg.ucsd.edu/static/models/iAM_Pc455.json#Plasmodium cynomolgi strain B
wget http://bigg.ucsd.edu/static/models/iAM_Pk459.json#Plasmodium knowlesi strain H
wget http://bigg.ucsd.edu/static/models/iAM_Pv461.json#Plasmodium vivax Sal-1
wget http://bigg.ucsd.edu/static/models/iS_1188.json#Shigella flexneri 2a str. 2457T
wget http://bigg.ucsd.edu/static/models/iSbBS512_1146.json#Shigella boydii CDC 3083-94
wget http://bigg.ucsd.edu/static/models/iSBO_1134.json#Shigella boydii Sb227
wget http://bigg.ucsd.edu/static/models/iSDY_1059.json#Shigella dysenteriae Sd197
wget http://bigg.ucsd.edu/static/models/iSF_1195.json#Shigella flexneri 2a str. 301
wget http://bigg.ucsd.edu/static/models/iSFV_1184.json#Shigella flexneri 5 str. 8401
wget http://bigg.ucsd.edu/static/models/iSFxv_1172.json#Shigella flexneri 2002017
wget http://bigg.ucsd.edu/static/models/iYS1720.json#Salmonella pan-reactome
wget http://bigg.ucsd.edu/static/models/iNJ661.json#Mycobacterium tuberculosis H37Rv
wget http://bigg.ucsd.edu/static/models/iJN678.json#Synechocystis sp. PCC 6803
wget http://bigg.ucsd.edu/static/models/iSB619.json#Staphylococcus aureus subsp. aureus N315
wget http://bigg.ucsd.edu/static/models/iCHOv1_DG44.json#Cricetulus griseus
wget http://bigg.ucsd.edu/static/models/iND750.json#Saccharomyces cerevisiae S288C
wget http://bigg.ucsd.edu/static/models/iJN746.json#Pseudomonas putida KT2440
#Constructing ecGEMs using GEMs
wget http://bigg.ucsd.edu/static/models/iAF692.json#Methanosarcina barkeri str. Fusaro
wget http://bigg.ucsd.edu/static/models/iAF987.json#Geobacter metallireducens GS-15
wget http://bigg.ucsd.edu/static/models/iAM_Pf480.json#Plasmodium falciparum 3D7
wget http://bigg.ucsd.edu/static/models/iCHOv1.json#Cricetulus griseus———can not found UniProt information
wget http://bigg.ucsd.edu/static/models/iCN718.json#Acinetobacter baumannii AYE——can not found UniProt information
wget http://bigg.ucsd.edu/static/models/iCN900.json#Clostridioides difficile 630
wget http://bigg.ucsd.edu/static/models/iEK1008.json#Mycobacterium tuberculosis H37Rv
wget http://bigg.ucsd.edu/static/models/iHN637.json#Clostridium ljungdahlii DSM 13528
wget http://bigg.ucsd.edu/static/models/iIS312.json#Trypanosoma cruzi Dm28c
wget http://bigg.ucsd.edu/static/models/iIT341.json#Helicobacter pylori 26695———UniProt information not enough
wget http://bigg.ucsd.edu/static/models/iJB785.json#Synechococcus elongatus PCC 7942
wget http://bigg.ucsd.edu/static/models/iJN1463.json#Pseudomonas putida KT2440
wget http://bigg.ucsd.edu/static/models/iLB1027_lipid.json# Phaeodactylum tricornutum CCAP 1055/1
wget http://bigg.ucsd.edu/static/models/iLJ478.json#Thermotoga maritima MSB8
wget http://bigg.ucsd.edu/static/models/iMM904.json#Saccharomyces cerevisiae S288C
wget http://bigg.ucsd.edu/static/models/iNF517.json#Lactococcus lactis subsp. cremoris MG1363
wget http://bigg.ucsd.edu/static/models/iPC815.json#Yersinia pestis CO92
wget http://bigg.ucsd.edu/static/models/iRC1080.json#Chlamydomonas reinhardtii——can not found UniProt information
wget http://bigg.ucsd.edu/static/models/iSSON_1240.json#Shigella sonnei Ss046
wget http://bigg.ucsd.edu/static/models/iSynCJ816.json#Synechocystis sp. PCC 6803
wget http://bigg.ucsd.edu/static/models/iYL1228.json#Klebsiella pneumoniae subsp. pneumoniae MGH 78578
wget http://bigg.ucsd.edu/static/models/iYO844.json#Bacillus subtilis subsp. subtilis str. 168
wget http://bigg.ucsd.edu/static/models/STM_v1_0.json#Salmonella enterica subsp. enterica serovar Typhimurium str. LT2
wget http://bigg.ucsd.edu/static/models/iYS854.json#Staphylococcus aureus subsp. aureus USA300_TCH1516——can not found UniProt information
Model add UniProt ID
import glob
import subprocess
import cobra
import re
import sys
sys.path.append(r'./script/')
from uniprot_id_mapping import *
#iLJ478
files = glob.glob('/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/*.json') #'iSynCJ816' 'iHN637'
for eachf in files:
submit_ids_list = []
print(eachf)
model_has_uniprot = False
if re.search('\.xml',eachf):
model = cobra.io.read_sbml_model(eachf)
elif re.search('\.json',eachf):
model = cobra.io.json.load_json_model(eachf)
model_name = eachf.split('/')[-1].split('.json')[0]
for eachg in model.genes:
try:
eachg.annotation
except:
print('Model does not have annotation!')
submit_ids_list.append(eachg.id)
else:
if 'uniprot' in eachg.annotation.keys():
model.genes.get_by_id(eachg.id).annotation['uniprot'] = model.genes.get_by_id(eachg.id).annotation['uniprot'][0]
model_has_uniprot = True
else:
#print(eachg.notes)
try:
eachg.notes['original_bigg_ids'][0]
except:
print('Model can not find UniProt ID!')
else:
if model_name=='iYO844':
submit_ids_list.append(eachg.id)
elif model_name=='iNF517' or model_name=='iJB785' or model_name=='iSynCJ816' or model_name=='iHN637' or model_name=='iLB1027_lipid':
submit_ids_list.append(eachg.notes['original_bigg_ids'][0].replace('-','_'))
eachg.id = eachg.notes['original_bigg_ids'][0].replace('-','_')
else:
submit_ids_list.append(eachg.notes['original_bigg_ids'][0].replace('-','_'))
submit_ids_list = list(set(submit_ids_list))
if model_name=='iNF517' or model_name=='iJB785' or model_name=='iSynCJ816' or model_name=='iHN637' or model_name=='iLB1027_lipid':
json_file_path_tmp = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/%s_change.json'%model_name
cobra.io.save_json_model(model, json_file_path_tmp)
model = cobra.io.json.load_json_model(json_file_path_tmp)
#print(len(submit_ids_list))
if model_has_uniprot:
json_file_path = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/'+model_name+ "_uniprot.json"
cobra.io.save_json_model(model, json_file_path)
else:
#gene to UniProtKB
job_id = submit_id_mapping(from_db="Gene_Name", to_db="UniProtKB", ids=submit_ids_list)
if check_id_mapping_results_ready(job_id):
link = get_id_mapping_results_link(job_id)
results = get_id_mapping_results_search(link)
for eachr in results['results']:
#print(eachr['from'],eachr['to']['primaryAccession'])
try:
eachr['to']['primaryAccession']
except:
#pass
print(eachr['from'],eachr['to']['primaryAccession'])
else:
try:
model.genes.get_by_id(eachr['from'])
except:
#iLJ478
try:
model.genes.get_by_id(eachr['from'].replace('_',''))
except:
eachr['from']
else:
model.genes.get_by_id(eachr['from'].replace('_','')).annotation['uniprot'] = eachr['to']['primaryAccession']
else:
model.genes.get_by_id(eachr['from']).annotation['uniprot'] = eachr['to']['primaryAccession']
json_file_path = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/'+model_name+ "_uniprot.json"
cobra.io.save_json_model(model, json_file_path)
#break
#iMM904_uniprot need manual correction GPR
/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/iYO844.json
Retrying in 3s
Fetched: 500 / 842
Fetched: 842 / 842
model_dict = {
'iAF692_uniprot':'Methanosarcina barkeri',
'iAF987_uniprot':'Geobacter metallireducens',
'iAM_Pf480_uniprot':'Plasmodium falciparum',
'iCN900_uniprot':'Clostridioides difficile',
'iEK1008_uniprot':'Mycobacterium tuberculosis',
'iHN637_uniprot':'Clostridium ljungdahlii',
'iIS312_uniprot':'Trypanosoma cruzi',
'iJB785_uniprot':'Synechococcus elongatus',
'iJN1463_uniprot':'Pseudomonas putida',
'iLB1027_lipid_uniprot':'Phaeodactylum tricornutum',
'iLJ478_uniprot':'Thermotoga maritima',
'iMM904_uniprot':'Saccharomyces cerevisiae',
'iNF517_uniprot':'Lactococcus lactis',
'iPC815_uniprot':'Yersinia pestis',
'iSSON_1240_uniprot':'Shigella sonnei',
'iSynCJ816_uniprot':'Synechocystis sp.',
'iYL1228_uniprot':'Klebsiella pneumoniae',
'iYO844_uniprot':'Bacillus subtilis',
'STM_v1_0_uniprot':'Salmonella enterica'
}
import glob
import subprocess
import re
import datetime
files = glob.glob('/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/BIGG_model/*_uniprot.json')#_uniprot
bigg_models_metabolites_file = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/bigg_models_metabolites.txt'
brenda_file = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/brenda_2023_1.txt'
uniprot_file = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/data/uniprot_data_accession_key.json'
for eachf in files:
print(eachf)
if re.search('.json',eachf):
model_name = eachf.split('/')[-1].split('.json')[0]
elif re.search('.xml',eachf):
model_name = eachf.split('/')[-1].split('.xml')[0]
org_name = "'"+model_dict[model_name]+"'"
#AutoPACMAN
ecGEM_file ='./model/BiGG/ec%s_AutoPACMEN.json'%model_name
work_folder = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/analysis/BiGG/get_kcat_mw_for_%s'%model_name
cmd_str = "python /hpcfs/fproject/mao_zt/MCModel/ECMpy/script/get_ecGEM_onestop.py -m %s -kcat 'No' -f 0.45 -bigg %s -org %s -sigma 0.5 -ptot 0.56 -kcat_method 'AutoPACMEN' -work_folder %s -brenda %s -uniprot %s -kcat_gap_fill 'mean' -r_gap_fill 'mean' -ecGEM %s" %(eachf,bigg_models_metabolites_file,org_name,work_folder,brenda_file,uniprot_file,ecGEM_file)
try:
starttime=datetime.datetime.now()
subprocess.run(cmd_str, shell=True)
endtime=datetime.datetime.now()
print(endtime-starttime)
except:
print('Can not construct AutoPACMAN ecGEM for %s!'%model_name)
#'''
#DLKcat
ecGEM_file ='./model/BiGG/ec%s_DLKcat.json'%model_name
work_folder = '/hpcfs/fproject/mao_zt/MCModel/ECMpy/analysis/BiGG/get_kcat_mw_for_%s'%model_name
cmd_str = "python /hpcfs/fproject/mao_zt/MCModel/ECMpy/script/get_ecGEM_onestop.py -m %s -kcat 'No' -f 0.45 -bigg %s -sigma 0.5 -ptot 0.56 -kcat_method 'DLKcat' -work_folder %s -ecGEM %s"%(eachf,bigg_models_metabolites_file,work_folder,ecGEM_file)
try:
starttime=datetime.datetime.now()
subprocess.run(cmd_str, shell=True)
endtime=datetime.datetime.now()
print(endtime-starttime)
except:
print('Can not construct DLKcat ecGEM for %s!'%model_name)
#'''
#break