Skip to content

Commit f389fcc

Browse files
Merge pull request #87 from matrulda/develop-643_use_closest_read_length
Option to use closest read length
2 parents 622f4ac + 0de61dc commit f389fcc

File tree

8 files changed

+159
-12
lines changed

8 files changed

+159
-12
lines changed

checkQC/app.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@
2525
@click.option("--config", help="Path to the checkQC configuration file", type=click.Path())
2626
@click.option('--json', is_flag=True, default=False, help="Print the results of the run as json to stdout")
2727
@click.option('--downgrade-errors', type=str, multiple=True, help="Downgrade errors to warnings for a specific handler, can be used multiple times")
28+
@click.option('--use-closest-read-length', is_flag=True, default=False, help="Use the closest read length if the read length used isn't specified in the config")
2829
@click.version_option(checkqc_version)
2930
@click.argument('runfolder', type=click.Path())
30-
def start(config, json, downgrade_errors, runfolder):
31+
def start(config, json, downgrade_errors, use_closest_read_length, runfolder):
3132
"""
3233
checkQC is a command line utility designed to quickly gather and assess quality control metrics from an
3334
Illumina sequencing run. It is highly customizable and which quality controls modules should be run
@@ -36,7 +37,7 @@ def start(config, json, downgrade_errors, runfolder):
3637
# -----------------------------------
3738
# This is the application entry point
3839
# -----------------------------------
39-
app = App(runfolder, config, json, downgrade_errors)
40+
app = App(runfolder, config, json, downgrade_errors, use_closest_read_length)
4041
app.run()
4142
sys.exit(app.exit_status)
4243

@@ -46,11 +47,13 @@ class App(object):
4647
This is the main application object for CheckQC.
4748
"""
4849

49-
def __init__(self, runfolder, config_file=None, json_mode=False, downgrade_errors_for=()):
50+
def __init__(self, runfolder, config_file=None, json_mode=False,
51+
downgrade_errors_for=(), use_closest_read_length=False):
5052
self._runfolder = runfolder
5153
self._config_file = config_file
5254
self._json_mode = json_mode
5355
self._downgrade_errors_for = downgrade_errors_for
56+
self._use_closest_read_length = use_closest_read_length
5457
self.exit_status = 0
5558

5659
def configure_and_run(self):
@@ -73,7 +76,8 @@ def configure_and_run(self):
7376
# TODO For now assume symmetric read lengths
7477
both_read_lengths = run_type_recognizer.read_length()
7578
read_length = int(both_read_lengths.split("-")[0])
76-
handler_config = config.get_handler_configs(instrument_and_reagent_version, read_length, self._downgrade_errors_for)
79+
handler_config = config.get_handler_configs(instrument_and_reagent_version, read_length,
80+
self._downgrade_errors_for, self._use_closest_read_length)
7781

7882
run_type_summary = RunTypeSummarizer.summarize(instrument_and_reagent_version, both_read_lengths, handler_config)
7983

checkQC/config.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __init__(self, config):
7878
"""
7979
self._config = config
8080

81-
def _get_matching_handler(self, instrument_and_reagent_type, read_length):
81+
def _get_matching_handler(self, instrument_and_reagent_type, read_length, use_closest_read_length=False):
8282
"""
8383
Get the handler matching the provided parameters.
8484
@@ -90,7 +90,6 @@ def _get_matching_handler(self, instrument_and_reagent_type, read_length):
9090

9191
try:
9292
config_read_lengths = list(map(str, self._config[instrument_and_reagent_type].keys()))
93-
9493
for config_read_length in config_read_lengths:
9594
if "-" in config_read_length:
9695
split_read_length = config_read_length.split("-")
@@ -101,6 +100,10 @@ def _get_matching_handler(self, instrument_and_reagent_type, read_length):
101100
else:
102101
if int(read_length) == int(config_read_length):
103102
return self._config[instrument_and_reagent_type][int(config_read_length)]["handlers"]
103+
if use_closest_read_length:
104+
closest_read_length = self._find_closest_read_length(config_read_lengths, read_length)
105+
log.info(f"Read length {read_length} not find in config. Using closest read length: {closest_read_length}")
106+
return self._config[instrument_and_reagent_type][closest_read_length]["handlers"]
104107
raise ConfigEntryMissing("Could not find a config entry matching read length '{}' on "
105108
"instrument '{}'. Please check the provided "
106109
"config.".format(read_length, instrument_and_reagent_type))
@@ -110,6 +113,28 @@ def _get_matching_handler(self, instrument_and_reagent_type, read_length):
110113
"file ".format(instrument_and_reagent_type,
111114
read_length))
112115

116+
def _find_closest_read_length(self, config_read_lengths, read_length):
117+
"""
118+
Find the closest read length in the config
119+
120+
:param config_read_lengths: dict with config read lengths for a specific intrument and reagent type
121+
:param read_length: either as a range, e.g. '50-70' or a single value, e.g. '50'
122+
:returns: the closest read length, as a string (if interval) or int (if single value)
123+
"""
124+
distance = {}
125+
for config_read_length in sorted(config_read_lengths, reverse=True):
126+
if "-" in config_read_length:
127+
split_read_length = config_read_length.split("-")
128+
distance[config_read_length] = min(abs(int(read_length) - int(split_read_length[0])),
129+
abs(int(read_length) - int(split_read_length[1])))
130+
else:
131+
distance[config_read_length] = abs(int(read_length) - int(config_read_length))
132+
closest_read_length = min(distance, key=distance.get)
133+
if "-" not in closest_read_length:
134+
return int(closest_read_length)
135+
else:
136+
return closest_read_length
137+
113138
def _add_default_config(self, current_handler_config):
114139
"""
115140
Add the default handlers specified in the config.
@@ -144,7 +169,8 @@ def _downgrade_errors(self, current_handler_config, downgrade_errors_for):
144169
downgraded_handler_config.append(handler)
145170
return downgraded_handler_config
146171

147-
def get_handler_configs(self, instrument_and_reagent_type, read_length, downgrade_errors_for=()):
172+
def get_handler_configs(self, instrument_and_reagent_type, read_length,
173+
downgrade_errors_for=(), use_closest_read_length=False):
148174
"""
149175
Get the handler configurations for the specified parameters.
150176
@@ -154,7 +180,8 @@ def get_handler_configs(self, instrument_and_reagent_type, read_length, downgrad
154180
"""
155181

156182
try:
157-
handler_config = self._get_matching_handler(instrument_and_reagent_type, read_length)
183+
handler_config = self._get_matching_handler(instrument_and_reagent_type,
184+
read_length, use_closest_read_length)
158185
handler_config_with_defaults = self._add_default_config(handler_config)
159186
downgraded_handler_config_with_defaults = self._downgrade_errors(
160187
handler_config_with_defaults,

checkQC/web_app.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@ def initialize(self, **kwargs):
2626
self.monitor_path = kwargs["monitoring_path"]
2727
self.qc_config_file = kwargs["qc_config_file"]
2828
self.downgrade_errors_for = ()
29+
self.use_closest_read_length = False
2930

3031
@staticmethod
31-
def _run_check_qc(monitor_path, qc_config_file, runfolder, downgrade_errors_for):
32+
def _run_check_qc(monitor_path, qc_config_file, runfolder, downgrade_errors_for,
33+
use_closest_read_length):
3234
path_to_runfolder = os.path.join(monitor_path, runfolder)
33-
checkqc_app = App(config_file=qc_config_file, runfolder=path_to_runfolder, downgrade_errors_for=downgrade_errors_for)
35+
checkqc_app = App(config_file=qc_config_file, runfolder=path_to_runfolder,
36+
downgrade_errors_for=downgrade_errors_for,
37+
use_closest_read_length=use_closest_read_length)
3438
reports = checkqc_app.configure_and_run()
3539
reports["version"] = checkqc_version
3640
return reports
@@ -43,8 +47,12 @@ def _write_error(self, status_code, reason):
4347
def get(self, runfolder):
4448
if "downgrade" in self.request.query_arguments:
4549
self.downgrade_errors_for = self.get_query_argument("downgrade")
50+
if "useClosestReadLength" in self.request.query_arguments:
51+
self.use_closest_read_length = True
4652
try:
47-
reports = self._run_check_qc(self.monitor_path, self.qc_config_file, runfolder, self.downgrade_errors_for)
53+
reports = self._run_check_qc(self.monitor_path, self.qc_config_file,
54+
runfolder, self.downgrade_errors_for,
55+
self.use_closest_read_length)
4856
self.set_header("Content-Type", "application/json")
4957
self.write(reports)
5058
except RunfolderNotFoundError:

docs/index.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,24 @@ This parameter can be supplied to the webservice as a query argument:
237237
238238
curl -s -w'\n' localhost:9999/qc/170726_D00118_0303_BCB1TVANXX?downgrade=ReadsPerSampleHandler,UndeterminedPercentageHandler | python -m json.tool
239239
240+
Use closest read length
241+
------------------------------
242+
243+
It is possible to instruct CheckQC to use the closest read length if the read length of the run is not found in the config.
244+
In case of a tie between two read lengths, the longer read length (with stricter QC criteria) will be used.
245+
246+
Usage:
247+
248+
.. code-block :: console
249+
250+
$ checkqc --use-closest-read-length <RUNFOLDER>
251+
252+
This parameter can be supplied to the webservice as a query argument:
253+
254+
.. code-block :: console
255+
256+
curl -s -w'\n' localhost:9999/qc/170726_D00118_0303_BCB1TVANXX?useClosestReadLength | python -m json.tool
257+
240258
Running CheckQC as a webservice
241259
-------------------------------
242260

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# NOTE
2+
# ----
3+
# This config is a partial copy of checkQC/default_config/config.yaml,
4+
# where the read length 126 is not specified for hiseq2500_rapidhighoutput_v4
5+
6+
# Use this section to provide configuration options to the parsers
7+
parser_configurations:
8+
StatsJsonParser:
9+
# Path to where the bcl2fastq output (i.e. fastq files, etc) is located relative to
10+
# the runfolder
11+
bcl2fastq_output_path: Data/Intensities/BaseCalls
12+
SamplesheetParser:
13+
samplesheet_name: SampleSheet.csv
14+
15+
default_handlers:
16+
- name: UndeterminedPercentageHandler
17+
warning: unknown
18+
error: 9 # <% Phix on lane> + < value as %>
19+
- name: UnidentifiedIndexHandler
20+
significance_threshold: 1 # % of reads in unidentified
21+
# Indexes which are white-listed will only cause a warning even if they occur
22+
# above the significance level.
23+
# They will be matched like regular expressions,
24+
# so e.g. NNN will match exactly three NNNs, while
25+
# N{3,} will match three or more Ns.
26+
white_listed_indexes:
27+
- .*N.*
28+
- G{6,}
29+
30+
hiseq2500_rapidhighoutput_v4:
31+
131:
32+
handlers:
33+
- name: ClusterPFHandler
34+
warning: 180 # Millons of clusters
35+
error: unknown
36+
- name: Q30Handler
37+
warning: 80 # Give percentage for reads greater than Q30
38+
error: unknown # Give percentage for reads greater than Q30
39+
- name: ErrorRateHandler
40+
allow_missing_error_rate: False
41+
warning: 2
42+
error: unknown
43+
- name: ReadsPerSampleHandler
44+
warning: unknown
45+
error: 90 # 50 % of threshold for clusters pass filter

tests/test_app.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@ def test_run_json_mode(self):
1919
# The test data contains fatal qc errors
2020
self.assertEqual(app.run(), 1)
2121

22+
def test_run_use_closest_read_length(self):
23+
config_file = os.path.join("tests", "resources", "read_length_not_in_config.yaml")
24+
app = App(runfolder=self.RUNFOLDER, config_file=config_file, use_closest_read_length=True)
25+
# The test data contains fatal qc errors
26+
self.assertEqual(app.run(), 1)
27+
2228
def test_run_downgrade_error(self):
2329
app = App(runfolder=self.RUNFOLDER, downgrade_errors_for="ReadsPerSampleHandler")
2430
# Test data should not produce fatal qc errors anymore
2531
self.assertEqual(app.run(), 0)
2632

27-
2833
if __name__ == '__main__':
2934
unittest.main()

tests/test_config.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ def setUp(self):
2020
'150-299': {'handlers': [
2121
self.second_handler]}
2222
},
23+
'hiseqx': {
24+
50: {'handlers': [
25+
self.first_handler]},
26+
52: {'handlers': [
27+
self.second_handler]}
28+
},
2329
"default_handlers": [
2430
self.default_handler,
2531
self.first_handler
@@ -55,6 +61,18 @@ def test_downgrade_errors(self):
5561
self.assertEqual(result[1]["error"], "unknown")
5662
self.assertEqual(result[1]["warning"], 100)
5763

64+
def test_use_closest_read_length(self):
65+
result = self.config._get_matching_handler('miseq_v3', 149, use_closest_read_length=True)
66+
self.assertEqual(result, [self.second_handler])
67+
68+
def test_use_closest_read_length_in_the_middle(self):
69+
result = self.config._get_matching_handler('hiseqx', 51, use_closest_read_length=True)
70+
self.assertEqual(result, [self.second_handler])
71+
72+
def test_machine_and_reagent_type_not_found(self):
73+
with self.assertRaises(ConfigEntryMissing):
74+
self.config._get_matching_handler('foo', 51, use_closest_read_length=True)
75+
5876

5977
class TestConfigFactory(unittest.TestCase):
6078

tests/test_web_app.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,25 @@ def get_app(self):
4242
def test_qc_fail_fast_for_unknown_config(self):
4343
response = self.fetch('/qc/170726_D00118_0303_BCB1TVANXX')
4444
self.assertEqual(response.code, 500)
45+
46+
47+
class TestWebAppReadLengthNotInConfig(AsyncHTTPTestCase):
48+
49+
def get_app(self):
50+
routes = WebApp._routes(monitoring_path=os.path.join("tests", "resources"),
51+
qc_config_file=os.path.join("tests", "resources", "read_length_not_in_config.yaml"))
52+
return tornado.web.Application(routes)
53+
54+
def test_use_closest_read_length(self):
55+
response = self.fetch('/qc/170726_D00118_0303_BCB1TVANXX?useClosestReadLength')
56+
result = json.loads(response.body)
57+
self.assertEqual(response.code, 200)
58+
# Test data produce fatal qc errors
59+
self.assertEqual(result["exit_status"], 1)
60+
61+
def test_use_closest_read_length_and_downgrade_errors(self):
62+
response = self.fetch('/qc/170726_D00118_0303_BCB1TVANXX?useClosestReadLength&downgrade=ReadsPerSampleHandler')
63+
result = json.loads(response.body)
64+
self.assertEqual(response.code, 200)
65+
# Test data produce fatal qc errors
66+
self.assertEqual(result["exit_status"], 0)

0 commit comments

Comments
 (0)