Skip to content

Commit 2110d15

Browse files
committed
v0.1.9
1 parent 7245f89 commit 2110d15

File tree

4 files changed

+82
-36
lines changed

4 files changed

+82
-36
lines changed

README.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
Coder-friendly codon usage tables for various organisms, in CSV format
2-
----------------------------------------------------------------------
1+
Codon usage tables for various organisms, in CSV format
2+
--------------------------------------------------------
33

4-
This repository contains simple CSV files (see [``data/tables/``](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/data/tables)) of the codon usage of various organisms,
4+
This repository contains simple CSV files (in [``data/tables/``](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/data/tables)) of the codon usage of various organisms,
55
meant to be used by codon optimization software. All files in are of the form
66

77
```
@@ -16,6 +16,8 @@ K,AAG,0.24
1616
etc.
1717
```
1818

19+
It also contains a script to download new codon usage tables, using a TaxID to identify organisms.
20+
1921
The data comes from [http://www.kazusa.or.jp](http://www.kazusa.or.jp) (they computed the codon usages from NCBI sequence data).
2022

2123
More informations are available [here](http://www.kazusa.or.jp/codon/readme_codon.html
@@ -27,15 +29,15 @@ status for the year 2000.
2729
Nakamura, Y., Gojobori, T. and Ikemura, T. (2000) Nucl. Acids Res. 28, 292.
2830
```
2931

30-
Language bindings
31-
-----------------
32+
Python bindings
33+
---------------
34+
35+
To get these tables from Python, use the [python_codon_tables](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/python_codon_tables) package.
3236

33-
This repositories also hosts the [python_codon_tables](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/python_codon_tables) package, which allows to use these tables from Python and download any new tables from Kazusa using taxonomic IDs.
3437

35-
If you need these tables in another language, and you see a way to turn the repository into a package for that language, please submit a PR.
3638

3739
Contribute
3840
----------
3941

40-
This repo was started at the Edinburgh Genome Foundry by [Zulko](https://github.com/Zulko) and is released
42+
This repo was started at the Edinburgh Genome Foundry by Zulko and is released
4143
on [Github](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables) under a Public Domain licence (and no warranty whatsoever, please cross-check the codon usage with other sources if you are not sure). Feel free to add other tables if you think of more commonly used species.

codon_usage_data/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.1.8
1+
0.1.9

python_codon_tables/README.rst

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,9 @@ Usage
5353
codons_tables = pct.get_all_available_codons_tables()
5454
print (codons_tables['c_elegans_6239']['L']['CTA']) # returns 0.09
5555
56-
- Notice that by default the tables use nucleotide T instead of U. Using
57-
``get_codons_table('e_coli', replace_U_by_T=False)`` will leave Us as Us.
56+
- Notice that by default the tables use nucleotide T instead of U. Using ``get_codons_table('e_coli', replace_U_by_T=False)`` will leave Us as Us.
5857

59-
- In ``get_codons_table`` you can also provide a "shorthand" notation
60-
``b_subtilis``, which will be automatically extended to ``b_subtilis_1423`` as
61-
it appears so in the built-in table (use this feature at your own risks!)
58+
- In ``get_codons_table`` you can also provide a "shorthand" notation ``b_subtilis``, which will be automatically extended to ``b_subtilis_1423`` as it appears so in the built-in table (use this feature at your own risks!)
6259

6360
Contribute
6461
----------
@@ -79,3 +76,11 @@ Manual:
7976
.. code:: bash
8077
8178
(sudo) python setup.py install
79+
80+
More biology software
81+
-----------------------
82+
83+
.. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png
84+
:target: https://edinburgh-genome-foundry.github.io/
85+
86+
This library is part of the `EGF Codons <https://edinburgh-genome-foundry.github.io/>`_ synthetic biology software suite for DNA design, manufacturing and validation.

python_codon_tables/python_codon_tables.py

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,48 @@
33
import os
44
from functools import lru_cache
55

6-
if (sys.version_info[0] == 3):
6+
if sys.version_info[0] == 3:
77
import urllib.request
8+
89
urlopen = urllib.request.urlopen
910
else:
1011
import urllib2
12+
1113
urlopen = urllib2.urlopen
1214

1315
_this_dir = os.path.dirname(os.path.realpath(__file__))
14-
_tables_dir = os.path.join(_this_dir, '..', "codon_usage_data", "tables")
16+
_tables_dir = os.path.join(_this_dir, "..", "codon_usage_data", "tables")
1517

1618
available_codon_tables_names = [
17-
filename[:-4] for filename in os.listdir(_tables_dir)]
19+
filename[:-4] for filename in os.listdir(_tables_dir)
20+
]
1821

1922
available_codon_tables_shortnames = {
20-
"_".join(table_name.split('_')[:-1]): table_name
23+
"_".join(table_name.split("_")[:-1]): table_name
2124
for table_name in available_codon_tables_names
2225
}
2326

27+
2428
def csv_string_to_codons_dict(csv_string):
2529
"""Transform a CSV string of a codon table to a dict."""
2630
result = {}
2731
for line in csv_string.split("\n")[1:]:
28-
aa, codon, freq = line.split(',')
32+
aa, codon, freq = line.split(",")
2933
if aa not in result:
3034
result[aa] = {}
3135
result[aa][codon] = float(freq)
3236
return result
3337

38+
3439
def table_with_U_replaced_by_T(table):
3540
return {
36-
aa: {
37-
codon.replace('U', 'T'): freq
38-
for codon, freq in aa_data.items()
39-
}
41+
aa: {codon.replace("U", "T"): freq for codon, freq in aa_data.items()}
4042
for aa, aa_data in table.items()
4143
}
44+
45+
4246
@lru_cache(maxsize=128)
43-
def get_codons_table(table_name, replace_U_by_T=True):
47+
def get_codons_table(table_name, replace_U_by_T=True, web_timeout=5):
4448
"""Get data from one of this package's builtin codon usage tables.
4549
4650
The ``table_name`` argument very flexible on purpose, it can be either an
@@ -50,42 +54,77 @@ def get_codons_table(table_name, replace_U_by_T=True):
5054
or a short form "e_coli" which will be automatically extended to
5155
"e_coli_316407" (at your own risks).
5256
57+
If a taxonomic ID is provided and no table with this taxID is present in
58+
the ``codon_usage_data/tables/`` folder, the table will be downloaded from
59+
the http://www.kazusa.or.jp/codon website. As this website sometimes go
60+
down, the parameter ``web_timeout`` controls how long to wait before a
61+
Python exception is raised, informing the user that Kazusa may be down.
62+
5363
The ``replace_U_by_T`` argument will replace all codons names from UAA to
5464
TAA etc.
5565
5666
Returns a dict {"*": {'TAA': 0.64...}, 'K': {'AAA': 0.76...}, ...}
67+
68+
5769
"""
5870
if replace_U_by_T:
59-
table = get_codons_table(table_name, replace_U_by_T=False)
71+
table = get_codons_table(
72+
table_name, replace_U_by_T=False, web_timeout=5
73+
)
6074
return table_with_U_replaced_by_T(table)
6175
if isinstance(table_name, int) or str.isdigit(table_name):
62-
return download_codons_table(taxid=table_name)
76+
return download_codons_table(taxid=table_name, timeout=web_timeout)
6377
if table_name in available_codon_tables_shortnames:
6478
table_name = available_codon_tables_shortnames[table_name]
65-
with open(os.path.join(_tables_dir, table_name + '.csv'), 'r') as f:
79+
with open(os.path.join(_tables_dir, table_name + ".csv"), "r") as f:
6680
return csv_string_to_codons_dict(f.read())
6781

82+
6883
def get_all_available_codons_tables(replace_U_by_T=True):
6984
"""Get all data from all of this package's builtin codon usage tables."""
7085
return {
7186
table_name: get_codons_table(table_name, replace_U_by_T=replace_U_by_T)
7287
for table_name in available_codon_tables_names
7388
}
7489

90+
7591
@lru_cache(maxsize=128)
76-
def download_codons_table(taxid=316407, target_file=None):
92+
def download_codons_table(taxid=316407, target_file=None, timeout=5):
7793
"""Get all data from all of this package's builtin codon usage tables."""
78-
_kazusa_url = ("http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi"
79-
"?aa=1&style=N&species=%s")
94+
_kazusa_url = (
95+
"http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi"
96+
"?aa=1&style=N&species=%s"
97+
)
8098
_codon_regexpr = r"([ATGCU]{3}) ([A-Z]|\*) (\d.\d+)"
8199
url = _kazusa_url % taxid
82-
html_content = urlopen(url).read().decode().replace("\n", " ")
83-
csv_data = "\n".join(["amino_acid,codon,relative_frequency"] + sorted([
84-
"%s,%s,%s" % (aa, codon, usage)
85-
for codon, aa, usage in re.findall(_codon_regexpr, html_content)
86-
]))
100+
try:
101+
web_handle = urlopen(url, timeout=timeout)
102+
except Exception as err:
103+
if "timed out" in str(err):
104+
raise RuntimeError(
105+
(
106+
"connexion to %s timed out after %d seconds. Maybe "
107+
"their website is down?"
108+
)
109+
% (url, timeout)
110+
)
111+
else:
112+
raise err
113+
114+
html_content = web_handle.read().decode().replace("\n", " ")
115+
csv_data = "\n".join(
116+
["amino_acid,codon,relative_frequency"]
117+
+ sorted(
118+
[
119+
"%s,%s,%s" % (aa, codon, usage)
120+
for codon, aa, usage in re.findall(
121+
_codon_regexpr, html_content
122+
)
123+
]
124+
)
125+
)
87126
if target_file is not None:
88127
with open(target_file, "w+") as f:
89128
f.write(csv_data)
90129
else:
91-
return csv_string_to_codons_dict(csv_data)
130+
return csv_string_to_codons_dict(csv_data)

0 commit comments

Comments
 (0)