Skip to content

Commit 6c9bf95

Browse files
Merge pull request #54 from MatildaAslin/issue_53
Read length equal to number of cycles
2 parents 8911c28 + cd4e38f commit 6c9bf95

File tree

8 files changed

+58
-41
lines changed

8 files changed

+58
-41
lines changed

checkQC/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
__version__ = "1.3.0"
2+
__version__ = "1.4.0"

checkQC/config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
from pkg_resources import Requirement, resource_filename
33
import logging
4+
from checkQC.exceptions import ConfigEntryMissing
45

56
import yaml
67

@@ -84,19 +85,23 @@ def _get_matching_handler(self, instrument_and_reagent_type, read_length):
8485
:param instrument_and_reagent_type: the instrument and run type, e.g. 'hiseq2500_rapidhighoutput_v4'
8586
:param read_length: either as a range, e.g. '50-70' or a single value, e.g. '50'
8687
:returns: A dict corresponding to the handler config
88+
:raises: ConfigEntryMissing if instrument, reagent type and read length detected is missing from config
8789
"""
8890
config_read_lengths = list(map(str, self._config[instrument_and_reagent_type].keys()))
8991
for config_read_length in config_read_lengths:
9092
if "-" in config_read_length:
9193
split_read_length = config_read_length.split("-")
9294
low_break = int(split_read_length[0])
9395
high_break = int(split_read_length[1])
94-
if low_break < int(read_length) <= high_break:
96+
if low_break <= int(read_length) <= high_break:
9597
return self._config[instrument_and_reagent_type][config_read_length]["handlers"]
9698
else:
9799
if int(read_length) == int(config_read_length):
98100
return self._config[instrument_and_reagent_type][int(config_read_length)]["handlers"]
99-
raise KeyError
101+
raise ConfigEntryMissing("Could not find a config entry for instrument '{}' "
102+
"with read length '{}'. Please check the provided config "
103+
"file ".format(instrument_and_reagent_type,
104+
read_length))
100105

101106
def _add_default_config(self, current_handler_config):
102107
"""
@@ -121,17 +126,19 @@ def get_handler_configs(self, instrument_and_reagent_type, read_length):
121126
:param read_length: give the read length either as str or int
122127
:returns: the corresponding handler configuration(s)
123128
"""
129+
124130
try:
125131
handler_config = self._get_matching_handler(instrument_and_reagent_type, read_length)
126132
handler_config_with_defaults = self._add_default_config(handler_config)
127133
return handler_config_with_defaults
128-
except KeyError as e:
134+
except ConfigEntryMissing as e:
129135
log.error("Could not find a config entry for instrument '{}' "
130136
"with read length '{}'. Please check the provided config "
131137
"file ".format(instrument_and_reagent_type,
132138
read_length))
133139
raise e
134140

141+
135142
def __getitem__(self, key):
136143
return self._config[key]
137144

checkQC/default_config/config.yaml

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,6 @@
11

22

3-
# Usage instruction for config
4-
# -----------------------------
5-
# - Please note that intervals for read lengths are specified as: min < x <= max (i.e. upper inclusive, lower exclusive)
6-
# - All other intervals are exclusive.
7-
# - Values that are specified under each handler, e.g.
8-
#
9-
# - name: ClusterPFHandler
10-
# warning: 180 # Millons of clusters
11-
# error: unknown
12-
#
13-
# are specific to that partiular handler, but in general any value can be substituted with "unknown", in which case
14-
# this will not be evaluated.
15-
#
16-
# - Handlers specified under "default_handlers" will be run regardless of instrument type. For all other cases
17-
# it is possible to specify handlers per instrument and read length interval.
18-
#
3+
# For information about config usage, see http://checkqc.readthedocs.io/en/latest/#configuration-file
194

205
# Use this section to provide configuration options to the parsers
216
parser_configurations:
@@ -30,7 +15,7 @@ default_handlers:
3015
error: 10
3116

3217
hiseq2500_rapidhighoutput_v4:
33-
50-70:
18+
51-71:
3419
handlers:
3520
- name: ClusterPFHandler
3621
warning: 180 # Millons of clusters
@@ -45,7 +30,7 @@ hiseq2500_rapidhighoutput_v4:
4530
- name: ReadsPerSampleHandler
4631
warning: unknown
4732
error: 90 # 50 % of threshold for clusters pass filter
48-
100-110:
33+
101-111:
4934
handlers:
5035
- name: ClusterPFHandler
5136
warning: 180 # Millons of clusters
@@ -60,7 +45,7 @@ hiseq2500_rapidhighoutput_v4:
6045
- name: ReadsPerSampleHandler
6146
warning: unknown
6247
error: 90 # 50 % of threshold for clusters pass filter
63-
120-130:
48+
121-131:
6449
handlers:
6550
- name: ClusterPFHandler
6651
warning: 180 # Millons of clusters
@@ -77,7 +62,7 @@ hiseq2500_rapidhighoutput_v4:
7762
error: 90 # 50 % of threshold for clusters pass filter
7863

7964
hiseq2500_rapidrun_v2:
80-
50:
65+
51:
8166
handlers:
8267
- name: ClusterPFHandler
8368
warning: 110 # Millons of clusters
@@ -92,7 +77,7 @@ hiseq2500_rapidrun_v2:
9277
- name: ReadsPerSampleHandler
9378
warning: unknown
9479
error: 55 # 50 % of threshold for clusters pass filter
95-
100:
80+
101:
9681
handlers:
9782
- name: ClusterPFHandler
9883
warning: 110 # Millons of clusters
@@ -107,7 +92,7 @@ hiseq2500_rapidrun_v2:
10792
- name: ReadsPerSampleHandler
10893
warning: unknown
10994
error: 55 # 50 % of threshold for clusters pass filter
110-
250:
95+
251:
11196
handlers:
11297
- name: ClusterPFHandler
11398
warning: 110 # Millons of clusters
@@ -124,7 +109,7 @@ hiseq2500_rapidrun_v2:
124109
error: 55 # 50 % of threshold for clusters pass filter
125110

126111
hiseqx_v2:
127-
150:
112+
151:
128113
handlers:
129114
- name: ClusterPFHandler
130115
warning: 400 # Millons of clusters
@@ -141,7 +126,7 @@ hiseqx_v2:
141126
error: 200 # 50 % of threshold for clusters pass filter
142127

143128
novaseq_v1:
144-
150:
129+
151:
145130
handlers:
146131
- name: ClusterPFHandler
147132
warning: 1400 # Millons of clusters
@@ -158,7 +143,7 @@ novaseq_v1:
158143
error: 700 # 50 % of threshold for clusters pass filter
159144

160145
miseq_v2:
161-
50:
146+
51:
162147
handlers:
163148
- name: ClusterPFHandler
164149
warning: 10 # Millons of clusters
@@ -173,7 +158,7 @@ miseq_v2:
173158
- name: ReadsPerSampleHandler
174159
warning: unknown
175160
error: 5 # 50 % of threshold for clusters pass filter
176-
150:
161+
151:
177162
handlers:
178163
- name: ClusterPFHandler
179164
warning: 10 # Millons of clusters
@@ -188,7 +173,7 @@ miseq_v2:
188173
- name: ReadsPerSampleHandler
189174
warning: unknown
190175
error: 5 # 50 % of threshold for clusters pass filter
191-
250:
176+
251:
192177
handlers:
193178
- name: ClusterPFHandler
194179
warning: 10 # Millons of clusters
@@ -205,7 +190,7 @@ miseq_v2:
205190
error: 5 # 50 % of threshold for clusters pass filter
206191

207192
miseq_v3:
208-
75:
193+
76:
209194
handlers:
210195
- name: ClusterPFHandler
211196
warning: 18 # Millons of clusters
@@ -220,7 +205,7 @@ miseq_v3:
220205
- name: ReadsPerSampleHandler
221206
warning: unknown
222207
error: 9 # 50 % of threshold for clusters pass filter
223-
300:
208+
301:
224209
handlers:
225210
- name: ClusterPFHandler
226211
warning: 18 # Millons of clusters

checkQC/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,7 @@ class QCHandlerNotFound(CheckQCException):
3939

4040
class ConfigurationError(CheckQCException):
4141
pass
42+
43+
44+
class ConfigEntryMissing(CheckQCException):
45+
pass

checkQC/run_type_recognizer.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,7 @@ def read_length(self):
215215
read_lengths = []
216216
for read in reads:
217217
if not read['@IsIndexedRead'] == 'Y':
218-
# The -1 is necessary for the number of cycles to correspond to the
219-
# way it is specified in the docs. I.e. read length 300 in the docs
220-
# means 301 cycles were run...
221-
read_lengths.append(int(read['@NumCycles']) - 1)
218+
read_lengths.append(int(read['@NumCycles']))
222219

223220
if len(read_lengths) < 1:
224221
raise RunModeUnknown("Found no NumCycles in RunInfo.xml, could not determine read length")

docs/index.rst

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Instrument types supported in checkQC are the following:
1818
- HiSeq2500
1919
- MiSeq
2020
- NovaSeq
21-
21+
2222
Install instructions
2323
--------------------
2424
CheckQC **requires Python 3.5** (or higher to run). Furthermore, right now the Illumina Interop
@@ -49,7 +49,7 @@ your own custom file, you can do so by adding a path to the config like this:
4949
5050
When CheckQC starts and no path to the config file is specified it will give you
5151
the path to where the default file is located on your system, if you want a template
52-
that you can customize according to your own needs.
52+
that you can customize according to your own needs. See section `Configuration file`_ for more information.
5353

5454
When you run CheckQC you can expect to see output similar to this:
5555

@@ -199,6 +199,28 @@ In this example we use the python json tool to pretty print the json output:
199199
}
200200
}
201201
202+
Configuration file
203+
------------------
204+
205+
- The location of the default config is printed when running CheckQC without the `--config` flag.
206+
It can be used to as a template when making a customized config.
207+
208+
- Read length is defined as the number of cycles run for a read.
209+
210+
- All intervals for read lengths are specified as: min <= x <= max (i.e. upper inclusive, lower inclusive).
211+
212+
- All other intervals are exclusive.
213+
214+
- Values that are specified under each handler are specific to that particular handler, but in general any value
215+
can be substituted with "unknown", in which case this will not be evaluated.
216+
217+
- Handlers specified under "default_handlers" will be run regardless of instrument type. For all other cases it
218+
is possible to specify handlers per instrument and read length interval.
219+
220+
- Apart from QC thresholds, the config also contains parser configurations, where parser specific variables can be set.
221+
The Stats.json parser has a bcl2fastq_output_path variable, that can be set to specify where bcl2fastq output is located
222+
relative to the runfolder. Default value is "Data/Intensities/BaseCalls".
223+
202224
Running CheckQC as a webservice
203225
-------------------------------
204226

tests/test_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from checkQC.config import Config, ConfigFactory
55

6+
from checkQC.exceptions import ConfigEntryMissing
7+
68

79
class TestConfig(unittest.TestCase):
810

@@ -33,7 +35,7 @@ def test_interval_match(self):
3335
self.assertListEqual(handlers, [self.second_handler, self.default_handler, self.first_handler])
3436

3537
def test_no_match(self):
36-
with self.assertRaises(KeyError):
38+
with self.assertRaises(ConfigEntryMissing):
3739
self.config.get_handler_configs('miseq_v3', 999)
3840

3941
def test_call_with_str(self):

tests/test_run_type_recognizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_instrument_type(self):
1919
self.assertEqual(expected, actual.name())
2020

2121
def test_read_length(self):
22-
expected = "50"
22+
expected = "51"
2323
actual = self.runtype_recognizer.read_length()
2424
self.assertEqual(expected, actual)
2525

0 commit comments

Comments
 (0)