v0.1.9

veghp · veghp · commit 2110d15dfcf1 · 2020-09-06T13:34:32.000+01:00
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
-Coder-friendly codon usage tables for various organisms, in CSV format
-----------------------------------------------------------------------
+Codon usage tables for various organisms, in CSV format
+--------------------------------------------------------
 
-This repository contains simple CSV files (see [``data/tables/``](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/data/tables)) of the codon usage of various organisms,
+This repository contains simple CSV files (in [``data/tables/``](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/data/tables)) of the codon usage of various organisms,
 meant to be used by codon optimization software. All files in are of the form
 
 ```
@@ -16,6 +16,8 @@ K,AAG,0.24
 etc.
 ```
 
+It also contains a script to download new codon usage tables, using a TaxID to identify organisms.
+
 The data comes from [http://www.kazusa.or.jp](http://www.kazusa.or.jp) (they computed the codon usages from NCBI sequence data).
 
 More informations are available [here](http://www.kazusa.or.jp/codon/readme_codon.html
@@ -27,15 +29,15 @@ status for the year 2000.
 Nakamura, Y., Gojobori, T. and Ikemura, T. (2000) Nucl. Acids Res. 28, 292.
 ```
 
-Language bindings
------------------
+Python bindings
+---------------
+
+To get these tables from Python, use the [python_codon_tables](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/python_codon_tables) package.
 
-This repositories also hosts the [python_codon_tables](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables/tree/master/python_codon_tables) package, which allows to use these tables from Python and download any new tables from Kazusa using taxonomic IDs.
 
-If you need these tables in another language, and you see a way to turn the repository into a package for that language, please submit a PR.
 
 Contribute
 ----------
 
-This repo was started at the Edinburgh Genome Foundry by [Zulko](https://github.com/Zulko) and is released
+This repo was started at the Edinburgh Genome Foundry by Zulko and is released
 on [Github](https://github.com/Edinburgh-Genome-Foundry/codon-usage-tables) under a Public Domain licence (and no warranty whatsoever, please cross-check the codon usage with other sources if you are not sure). Feel free to add other tables if you think of more commonly used species.
diff --git a/codon_usage_data/version.txt b/codon_usage_data/version.txt
@@ -1 +1 @@
-0.1.8
+0.1.9
diff --git a/python_codon_tables/README.rst b/python_codon_tables/README.rst
@@ -53,12 +53,9 @@ Usage
     codons_tables = pct.get_all_available_codons_tables()
     print (codons_tables['c_elegans_6239']['L']['CTA'])  # returns 0.09
 
-- Notice that by default the tables use nucleotide T instead of U. Using
-  ``get_codons_table('e_coli', replace_U_by_T=False)`` will leave Us as Us.
+- Notice that by default the tables use nucleotide T instead of U. Using ``get_codons_table('e_coli', replace_U_by_T=False)`` will leave Us as Us.
 
-- In ``get_codons_table`` you can also provide a "shorthand" notation
-``b_subtilis``, which will be automatically extended to ``b_subtilis_1423`` as
-it appears so in the built-in table (use this feature at your own risks!)
+- In ``get_codons_table`` you can also provide a "shorthand" notation ``b_subtilis``, which will be automatically extended to ``b_subtilis_1423`` as it appears so in the built-in table (use this feature at your own risks!)
 
 Contribute
 ----------
@@ -79,3 +76,11 @@ Manual:
 .. code:: bash
 
     (sudo) python setup.py install
+
+More biology software
+-----------------------
+
+.. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png
+  :target: https://edinburgh-genome-foundry.github.io/
+
+This library is part of the `EGF Codons <https://edinburgh-genome-foundry.github.io/>`_ synthetic biology software suite for DNA design, manufacturing and validation.
diff --git a/python_codon_tables/python_codon_tables.py b/python_codon_tables/python_codon_tables.py
@@ -3,44 +3,48 @@
 import os
 from functools import lru_cache
 
-if (sys.version_info[0] == 3):
+if sys.version_info[0] == 3:
     import urllib.request
+
     urlopen = urllib.request.urlopen
 else:
     import urllib2
+
     urlopen = urllib2.urlopen
 
 _this_dir = os.path.dirname(os.path.realpath(__file__))
-_tables_dir = os.path.join(_this_dir, '..', "codon_usage_data", "tables")
+_tables_dir = os.path.join(_this_dir, "..", "codon_usage_data", "tables")
 
 available_codon_tables_names = [
-    filename[:-4] for filename in os.listdir(_tables_dir)]
+    filename[:-4] for filename in os.listdir(_tables_dir)
+]
 
 available_codon_tables_shortnames = {
-    "_".join(table_name.split('_')[:-1]): table_name
+    "_".join(table_name.split("_")[:-1]): table_name
     for table_name in available_codon_tables_names
 }
 
+
 def csv_string_to_codons_dict(csv_string):
     """Transform a CSV string of a codon table to a dict."""
     result = {}
     for line in csv_string.split("\n")[1:]:
-        aa, codon, freq = line.split(',')
+        aa, codon, freq = line.split(",")
         if aa not in result:
             result[aa] = {}
         result[aa][codon] = float(freq)
     return result
 
+
 def table_with_U_replaced_by_T(table):
     return {
-        aa: {
-            codon.replace('U', 'T'): freq
-            for codon, freq in aa_data.items()
-        }
+        aa: {codon.replace("U", "T"): freq for codon, freq in aa_data.items()}
         for aa, aa_data in table.items()
     }
+
+
 @lru_cache(maxsize=128)
-def get_codons_table(table_name, replace_U_by_T=True):
+def get_codons_table(table_name, replace_U_by_T=True, web_timeout=5):
     """Get data from one of this package's builtin codon usage tables.
 
     The ``table_name`` argument very flexible on purpose, it can be either an
@@ -50,42 +54,77 @@ def get_codons_table(table_name, replace_U_by_T=True):
     or a short form "e_coli" which will be automatically extended to
     "e_coli_316407" (at your own risks).
 
+    If a taxonomic ID is provided and no table with this taxID is present in
+    the ``codon_usage_data/tables/`` folder, the table will be downloaded from
+    the http://www.kazusa.or.jp/codon website. As this website sometimes go
+    down, the parameter ``web_timeout`` controls how long to wait before a
+    Python exception is raised, informing the user that Kazusa may be down.
+
     The ``replace_U_by_T`` argument will replace all codons names from UAA to
     TAA etc.
 
     Returns a dict {"*": {'TAA': 0.64...}, 'K': {'AAA': 0.76...}, ...}
+
+    
     """
     if replace_U_by_T:
-        table = get_codons_table(table_name, replace_U_by_T=False)
+        table = get_codons_table(
+            table_name, replace_U_by_T=False, web_timeout=5
+        )
         return table_with_U_replaced_by_T(table)
     if isinstance(table_name, int) or str.isdigit(table_name):
-        return download_codons_table(taxid=table_name)
+        return download_codons_table(taxid=table_name, timeout=web_timeout)
     if table_name in available_codon_tables_shortnames:
         table_name = available_codon_tables_shortnames[table_name]
-    with open(os.path.join(_tables_dir, table_name + '.csv'), 'r') as f:
+    with open(os.path.join(_tables_dir, table_name + ".csv"), "r") as f:
         return csv_string_to_codons_dict(f.read())
 
+
 def get_all_available_codons_tables(replace_U_by_T=True):
     """Get all data from all of this package's builtin codon usage tables."""
     return {
         table_name: get_codons_table(table_name, replace_U_by_T=replace_U_by_T)
         for table_name in available_codon_tables_names
     }
 
+
 @lru_cache(maxsize=128)
-def download_codons_table(taxid=316407, target_file=None):
+def download_codons_table(taxid=316407, target_file=None, timeout=5):
     """Get all data from all of this package's builtin codon usage tables."""
-    _kazusa_url = ("http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi"
-                   "?aa=1&style=N&species=%s")
+    _kazusa_url = (
+        "http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi"
+        "?aa=1&style=N&species=%s"
+    )
     _codon_regexpr = r"([ATGCU]{3}) ([A-Z]|\*) (\d.\d+)"
     url = _kazusa_url % taxid
-    html_content = urlopen(url).read().decode().replace("\n", " ")
-    csv_data = "\n".join(["amino_acid,codon,relative_frequency"] + sorted([
-        "%s,%s,%s" % (aa, codon, usage)
-        for codon, aa, usage in re.findall(_codon_regexpr, html_content)
-    ]))
+    try:
+        web_handle = urlopen(url, timeout=timeout)
+    except Exception as err:
+        if "timed out" in str(err):
+            raise RuntimeError(
+                (
+                    "connexion to %s timed out after %d seconds. Maybe "
+                    "their website is down?"
+                )
+                % (url, timeout)
+            )
+        else:
+            raise err
+
+    html_content = web_handle.read().decode().replace("\n", " ")
+    csv_data = "\n".join(
+        ["amino_acid,codon,relative_frequency"]
+        + sorted(
+            [
+                "%s,%s,%s" % (aa, codon, usage)
+                for codon, aa, usage in re.findall(
+                    _codon_regexpr, html_content
+                )
+            ]
+        )
+    )
     if target_file is not None:
         with open(target_file, "w+") as f:
             f.write(csv_data)
     else:
-        return csv_string_to_codons_dict(csv_data)
+        return csv_string_to_codons_dict(csv_data)