Skip to content

Commit 19e1267

Browse files
authored
Merge pull request #67 from johandahlberg/iseq_support
Support for iSeq, and improved org for testing
2 parents 231d5b6 + 76af5e0 commit 19e1267

File tree

5 files changed

+126
-38
lines changed

5 files changed

+126
-38
lines changed

checkQC/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def configure_and_run(self):
6060
try:
6161
config = ConfigFactory.from_config_path(self._config_file)
6262
parser_configurations = config.get("parser_configurations", None)
63-
run_type_recognizer = RunTypeRecognizer(config=config, runfolder=self._runfolder)
63+
run_type_recognizer = RunTypeRecognizer(runfolder=self._runfolder)
6464
instrument_and_reagent_version = run_type_recognizer.instrument_and_reagent_version()
6565

6666
# TODO For now assume symmetric read lengths

checkQC/default_config/config.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,20 @@ miseq_v3:
314314
- name: ReadsPerSampleHandler
315315
warning: unknown
316316
error: 9 # 50 % of threshold for clusters pass filter
317+
318+
iseq_v1:
319+
300:
320+
handlers:
321+
- name: ClusterPFHandler
322+
warning: 4 # Millons of clusters
323+
error: unknown
324+
- name: Q30Handler
325+
warning: 80 # Give percentage for reads greater than Q30
326+
error: unknown # Give percentage for reads greater than Q30
327+
- name: ErrorRateHandler
328+
allow_missing_error_rate: False
329+
warning: 1.5
330+
error: unknown
331+
- name: ReadsPerSampleHandler
332+
warning: unknown
333+
error: 2 # 50 % of threshold for clusters pass filter

checkQC/run_type_recognizer.py

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11

2-
import os
32
import logging
4-
import xmltodict
53

64
from checkQC.exceptions import *
5+
from checkQC.runfolder_reader import RunfolderReader
76

87
log = logging.getLogger(__name__)
98

@@ -57,6 +56,15 @@ def reagent_version(runtype_recognizer):
5756
"""
5857
raise NotImplementedError
5958

59+
class ISeq(IlluminaInstrument):
60+
61+
@staticmethod
62+
def name():
63+
return "iseq"
64+
65+
@staticmethod
66+
def reagent_version(runtype_recognizer):
67+
return "v1"
6068

6169
class NovaSeq(IlluminaInstrument):
6270

@@ -142,42 +150,18 @@ class RunTypeRecognizer(object):
142150
The runfolder needs to have a 'RunInfo.xml' and a '[R|r]unParameters.xml' file.
143151
"""
144152

145-
def __init__(self, config, runfolder):
153+
def __init__(self, runfolder, runfolder_reader=RunfolderReader()):
146154
"""
147155
Create a RunTypeRecognizer instance
148156
149-
:param config: dictionary containing the app configuration
150157
:param runfolder: to gather data about
158+
:param runfolder_reader: reader class for for runfolders, defaults to RunfolderReader. Here to make testing
159+
easier.
151160
"""
152-
self._config = config
153161
self._runfolder = runfolder
154-
try:
155-
run_info_path = os.path.join(self._runfolder, "RunInfo.xml")
156-
if not os.path.exists(run_info_path):
157-
log.error("Could not find a RunInfo.xml in {}. Are you sure this is a runfolder?".format(run_info_path))
158-
raise FileNotFoundError("Could not find {}".format(run_info_path))
159-
with open(run_info_path) as f:
160-
self.run_info = xmltodict.parse(f.read())
161-
except FileNotFoundError:
162-
raise RunInfoXMLNotFound("Could not find RunInfo.xml at {}".format(run_info_path))
162+
self.run_info = runfolder_reader.read_run_info_xml(runfolder)
163+
self.run_parameters = runfolder_reader.read_run_parameters_xml(runfolder)
163164

164-
try:
165-
with open(self._find_run_parameters_xml()) as f:
166-
self.run_parameters = xmltodict.parse(f.read())
167-
except FileNotFoundError:
168-
raise RunParametersNotFound("Could not find [R|r]unParameters.xml for runfolder {}".format(self._runfolder))
169-
170-
def _find_run_parameters_xml(self):
171-
first_option = os.path.join(self._runfolder, "RunParameters.xml")
172-
second_option = os.path.join(self._runfolder, "runParameters.xml")
173-
if os.path.isfile(first_option):
174-
return first_option
175-
elif os.path.isfile(second_option):
176-
return second_option
177-
else:
178-
log.error("Could not find [R|r]unParameters.xml in directory {}. "
179-
"Are you sure this is a runfolder?".format(self._runfolder))
180-
raise FileNotFoundError("Could not find [R|r]unParameters.xml for runfolder {}".format(self._runfolder))
181165

182166
def instrument_type(self):
183167
"""
@@ -191,7 +175,8 @@ def instrument_type(self):
191175
machine_type_mappings = {"M": "miseq",
192176
"D": "hiseq2500",
193177
"ST": "hiseqx",
194-
"A": "novaseq"}
178+
"A": "novaseq",
179+
"FFSP": "iseq"}
195180

196181
for key, value in machine_type_mappings.items():
197182
if instrument_name.startswith(key):

checkQC/runfolder_reader.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
2+
import xmltodict
3+
import os
4+
import logging
5+
from checkQC.exceptions import RunParametersNotFound, RunInfoXMLNotFound
6+
7+
log = logging.getLogger(__name__)
8+
9+
10+
class RunfolderReader(object):
11+
"""
12+
This class provides methods to read files such as the runParameters.xml and RunInfo.xml files, which
13+
often need to be read to pick up info about what type of run we are looking at etc.
14+
"""
15+
16+
@staticmethod
17+
def read_run_parameters_xml(runfolder):
18+
"""
19+
Read the run parameters of an Illumina instrument are recorded in a file called
20+
runParameters or RunParameters depending on the exact instrument type. This method
21+
will read it and return it as a dict.
22+
:param runfolder: to look in
23+
:return: the [R|r]unParameters.xml as a dict
24+
:raises: RunParametersNotFound if no [R|r]unParameters.xml was found
25+
"""
26+
try:
27+
with open(RunfolderReader.find_run_parameters_xml(runfolder)) as f:
28+
return xmltodict.parse(f.read())
29+
except FileNotFoundError:
30+
raise RunParametersNotFound("Could not find [R|r]unParameters.xml for runfolder {}".format(runfolder))
31+
32+
@staticmethod
33+
def read_run_info_xml(runfolder):
34+
"""
35+
Read the RunInfo.xml and return it as a dict
36+
:param runfolder: to look in
37+
:return: RunInfo.xml data as a dict
38+
"""
39+
try:
40+
run_info_path = os.path.join(runfolder, "RunInfo.xml")
41+
if not os.path.exists(run_info_path):
42+
log.error("Could not find a RunInfo.xml in {}. Are you sure this is a runfolder?".format(run_info_path))
43+
raise FileNotFoundError("Could not find {}".format(run_info_path))
44+
with open(run_info_path) as f:
45+
return xmltodict.parse(f.read())
46+
except FileNotFoundError:
47+
raise RunInfoXMLNotFound("Could not find RunInfo.xml at {}".format(run_info_path))
48+
49+
@staticmethod
50+
def find_run_parameters_xml(runfolder):
51+
"""
52+
Finds the path to the [r|R]unParameters.xml
53+
:param runfolder: to look in
54+
:return: the path to the [r|R]unParameters.xml
55+
"""
56+
first_option = os.path.join(runfolder, "RunParameters.xml")
57+
second_option = os.path.join(runfolder, "runParameters.xml")
58+
if os.path.isfile(first_option):
59+
return first_option
60+
elif os.path.isfile(second_option):
61+
return second_option
62+
else:
63+
log.error("Could not find [R|r]unParameters.xml in directory {}. "
64+
"Are you sure this is a runfolder?".format(runfolder))
65+
raise FileNotFoundError("Could not find [R|r]unParameters.xml for runfolder {}".format(runfolder))

tests/test_run_type_recognizer.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
from unittest import TestCase
2+
import mock
23

34
import os
45

56
from checkQC.exceptions import RunModeUnknown, ReagentVersionUnknown
6-
from checkQC.run_type_recognizer import RunTypeRecognizer, HiSeq2500, MiSeq, NovaSeq
7-
class TestRunTypeRecognizer(TestCase):
7+
from checkQC.run_type_recognizer import RunTypeRecognizer, HiSeq2500, MiSeq, NovaSeq, ISeq
8+
from checkQC.runfolder_reader import RunfolderReader
89

9-
CONFIG = {"instrument_type_mappings":{"M": "miseq","D": "hiseq2500"}}
10+
11+
class TestRunTypeRecognizerFromFolder(TestCase):
1012

1113
def setUp(self):
1214
self.runtype_recognizer = RunTypeRecognizer(runfolder=os.path.join(os.path.dirname(__file__),
13-
"resources", "Rapid"),
14-
config=self.CONFIG)
15+
"resources", "Rapid"))
1516

1617
def test_instrument_type(self):
1718
expected = "hiseq2500"
@@ -29,6 +30,25 @@ def test_run_mode(self):
2930
self.assertEqual(expected, actual)
3031

3132

33+
class TestRunTypeRecognizerCorrectInstrumentsReturned(TestCase):
34+
35+
def _create_runtype_recognizer(self, instrument_name):
36+
with mock.patch.object(RunfolderReader, 'read_run_info_xml',
37+
return_value={"RunInfo": {"Run": {"Instrument": instrument_name}}}), \
38+
mock.patch.object(RunfolderReader, 'read_run_parameters_xml', return_value=None):
39+
return RunTypeRecognizer(runfolder='foo')
40+
41+
def test_returns_hiseq2500(self):
42+
runtyperecognizer = self._create_runtype_recognizer("D1000")
43+
actual = runtyperecognizer.instrument_type()
44+
self.assertTrue(isinstance(actual, HiSeq2500))
45+
46+
def test_returns_iseq(self):
47+
runtyperecognizer = self._create_runtype_recognizer("FFSP100")
48+
actual = runtyperecognizer.instrument_type()
49+
self.assertTrue(isinstance(actual, ISeq))
50+
51+
3252
class TestIlluminaInstrument(TestCase):
3353

3454
class MockRunTypeRecognizer():
@@ -39,6 +59,7 @@ def setUp(self):
3959
self.hiseq2500 = HiSeq2500()
4060
self.miseq = MiSeq()
4161
self.novaseq = NovaSeq()
62+
self.iseq = ISeq()
4263

4364
def test_all_is_well(self):
4465
runtype_dict = {"RunParameters": {"Setup": {"RunMode": "RapidHighOutput", "Sbs": "HiSeq SBS Kit v4"}}}

0 commit comments

Comments
 (0)