Skip to content

Commit 7b4ce7e

Browse files
authored
Merge pull request #25 from mariya/DEVELOP-1099-instrument-specific-completed-marker-file
DEVELOP-1099 - use custom completed marker files for different instruments
2 parents 7f7dfdd + dca0f1c commit 7b4ce7e

File tree

8 files changed

+181
-34
lines changed

8 files changed

+181
-34
lines changed

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,19 @@ This means that the client (e.g. a workflow) is responsible for updating the sta
2323

2424
**Try it out:**
2525

26-
Install using pip:
26+
# create venv
27+
virtualenv -p python2.7 venv/
2728

28-
pip install -r requirements/dev . # possible add -e if you're in development mode.
29+
# activate venv
30+
source venv/bin/activate
31+
32+
# install dependencies
33+
pip install -e . -r ./requirements/dev
2934

3035
Open up the `config/app.config` and specify the root directories that you want to monitor for runfolders. Then run:
3136

32-
runfolder-ws --port 9999 --configroot config/
37+
# start the dev server
38+
python server.py --debug --port=9999 --configroot='./config'
3339

3440
This will star the runfolder service on port 9999, and the api dock will be available under `localhost:9999/api`.
3541
Try curl-ing to see what you can do with it:
@@ -47,7 +53,3 @@ Alternatively, you can run the same script against a remote server, specifying t
4753

4854
Unit tests can be run with
4955
nosetests ./runfolder_tests/unit
50-
51-
**Install in production**
52-
One way to install this as a daemon in a production environment
53-
can be seen at https://github.com/arteria-project/arteria-provisioning

config/app.config

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,6 @@ monitored_directories:
99
# also enables adding the runfolder-ready marker through the API.
1010
can_create_runfolder: False
1111

12-
# An Illumina machine will write a RTAComplete.txt file when it has
13-
# finished the sequencing process. But when a user manually moves
14-
# the runfolder to the processing directory one can not guaranty
15-
# that all files have been moved when the RTAComplete.tx exist in
16-
# the new directory. The completed_marker_file can be changed to
17-
# another file, a file which for example will signal that the
18-
# transfer process has completed.
19-
completed_marker_file:
20-
- RTAComplete.txt
21-
- SequencingComplete.txt
22-
2312
# It seems that sometimes the completed_marker_files are not a
2413
# reliable indicator that nothing more will be modified or written
2514
# in the sequencing runfolder. Therefore a grace period can be

runfolder/lib/__init__.py

Whitespace-only changes.

runfolder/lib/instrument.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Typically, an Illumina machine writes a RTAComplete.txt file when it has
2+
# finished the sequencing process. However, some instruments may use another
3+
# completed_marker_file, for instance one that indicates that files have
4+
# been transferred.
5+
6+
import re
7+
8+
9+
class InstrumentFactory():
10+
@staticmethod
11+
def get_id(run_parameters):
12+
run_parameters = run_parameters.get('RunParameters', None)
13+
14+
# HiSeq / HiSeq X instrument stored in Setup#ScannerID
15+
val = run_parameters.get('Setup', {}).get('ScannerID')
16+
if val:
17+
return val
18+
19+
# Other instrument id's are at the top level of RunParameters
20+
for key in ["InstrumentName", "InstrumentId", "ScannerID"]:
21+
if key in run_parameters.viewkeys():
22+
return run_parameters.get(key)
23+
24+
@staticmethod
25+
def get_instrument(run_parameters):
26+
if not run_parameters:
27+
return Instrument()
28+
29+
instrument_id = InstrumentFactory.get_id(run_parameters)
30+
31+
if not instrument_id:
32+
return Instrument()
33+
34+
if re.search(NovaSeq.ID_PATTERN, instrument_id):
35+
return NovaSeq()
36+
if re.search(ISeq.ID_PATTERN, instrument_id):
37+
return ISeq()
38+
if re.search(MiSeq.ID_PATTERN, instrument_id):
39+
return MiSeq()
40+
if re.search(HiSeq.ID_PATTERN, instrument_id):
41+
return HiSeq()
42+
if re.search(HiSeqX.ID_PATTERN, instrument_id):
43+
return HiSeqX()
44+
return Instrument()
45+
46+
47+
class Instrument():
48+
COMPLETED_MARKER_FILE_RTA_COMPLETE = 'RTAComplete.txt'
49+
COMPLETED_MARKER_FILE_COPY_COMPLETE = 'CopyComplete.txt'
50+
51+
@staticmethod
52+
def completed_marker_file():
53+
return Instrument.COMPLETED_MARKER_FILE_RTA_COMPLETE
54+
55+
56+
class NovaSeq(Instrument):
57+
ID_PATTERN = '^A'
58+
59+
@staticmethod
60+
def completed_marker_file():
61+
return Instrument.COMPLETED_MARKER_FILE_COPY_COMPLETE
62+
63+
64+
class ISeq(Instrument):
65+
ID_PATTERN = '^FS'
66+
67+
@staticmethod
68+
def completed_marker_file():
69+
return Instrument.COMPLETED_MARKER_FILE_COPY_COMPLETE
70+
71+
72+
class MiSeq(Instrument):
73+
ID_PATTERN = '^M'
74+
75+
76+
class HiSeq(Instrument):
77+
ID_PATTERN = '^D'
78+
79+
80+
class HiSeqX(HiSeq):
81+
ID_PATTERN = '^ST-E'

runfolder/services.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from arteria.web.state import State
99
from arteria.web.state import validate_state
10+
from runfolder.lib.instrument import InstrumentFactory
1011

1112
class RunfolderInfo:
1213
"""
@@ -111,7 +112,8 @@ def create_runfolder(self, path):
111112
'ReagentKitBarcode': 'AB1234567-123V1',
112113
'RfidsInfo': {
113114
'LibraryTubeSerialBarcode': 'NV0012345-LIB'
114-
}
115+
},
116+
'ScannerID': 'M04499'
115117
}
116118
}
117119

@@ -186,13 +188,8 @@ def get_runfolder_state(self, runfolder):
186188
If the file .arteria/state exists, it will determine the state. If it doesn't
187189
exist, the existence of the marker file RTAComplete.txt determines the state.
188190
"""
189-
completed_marker_files = None
190-
try:
191-
completed_marker_files = self._configuration_svc["completed_marker_file"]
192-
if isinstance(completed_marker_files, str):
193-
completed_marker_files = [completed_marker_files]
194-
except KeyError:
195-
raise ConfigurationError("completed_marker_file must be set")
191+
instrument = InstrumentFactory.get_instrument(self.read_run_parameters(runfolder))
192+
completed_marker_file = instrument.completed_marker_file()
196193
completed_grace_minutes = None
197194
try:
198195
completed_grace_minutes = self._configuration_svc["completed_marker_grace_minutes"]
@@ -204,10 +201,9 @@ def get_runfolder_state(self, runfolder):
204201
state = self._get_runfolder_state_from_state_file(runfolder)
205202
if state == State.NONE:
206203
ready = True
207-
for marker_file in completed_marker_files:
208-
completed_marker = os.path.join(runfolder, marker_file)
209-
if not self._file_exists_and_is_older_than(completed_marker, completed_grace_minutes):
210-
ready = False
204+
completed_marker = os.path.join(runfolder, completed_marker_file)
205+
if not self._file_exists_and_is_older_than(completed_marker, completed_grace_minutes):
206+
ready = False
211207
if ready:
212208
state = State.READY
213209
return state
@@ -293,6 +289,7 @@ def get_metadata(self, path):
293289
metadata['library_tube_barcode'] = library_tube_barcode
294290
return metadata
295291

292+
296293
def get_reagent_kit_barcode(self, path, run_parameters):
297294
try:
298295
barcode = run_parameters['RunParameters']['ReagentKitBarcode']
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import unittest
2+
import logging
3+
import mock
4+
5+
6+
from runfolder.lib.instrument import InstrumentFactory, Instrument
7+
8+
9+
logger = logging.getLogger(__name__)
10+
11+
class InstrumentTestCase(unittest.TestCase):
12+
def test_get_instrument_by_id(self):
13+
# HiSeqX
14+
instrument_attr = {
15+
'Setup': {
16+
'ScannerID': 'ST-E123'
17+
}
18+
}
19+
run_parameters = {
20+
'RunParameters': instrument_attr
21+
}
22+
23+
i = InstrumentFactory.get_instrument(run_parameters)
24+
self.assertEqual(i.__class__.__name__, 'HiSeqX')
25+
self.assertEqual(i.completed_marker_file(), 'RTAComplete.txt')
26+
27+
# HiSeq
28+
run_parameters['RunParameters']['Setup']['ScannerID'] = 'D999'
29+
i = InstrumentFactory.get_instrument(run_parameters)
30+
self.assertEqual(i.__class__.__name__, 'HiSeq')
31+
self.assertEqual(i.completed_marker_file(), 'RTAComplete.txt')
32+
33+
# NovaSeq
34+
run_parameters = {
35+
'RunParameters': {
36+
'InstrumentName': 'ABC'
37+
}
38+
}
39+
i = InstrumentFactory.get_instrument(run_parameters)
40+
self.assertEqual(i.__class__.__name__, 'NovaSeq')
41+
self.assertEqual(i.completed_marker_file(), 'CopyComplete.txt')
42+
43+
# iSeq
44+
run_parameters = {
45+
'RunParameters': {
46+
'InstrumentId': 'FS1'
47+
}
48+
}
49+
i = InstrumentFactory.get_instrument(run_parameters)
50+
self.assertEqual(i.__class__.__name__, 'ISeq')
51+
self.assertEqual(i.completed_marker_file(), 'CopyComplete.txt')
52+
53+
# MiSeq
54+
run_parameters = {
55+
'RunParameters': {
56+
'ScannerID': 'M1'
57+
}
58+
}
59+
i = InstrumentFactory.get_instrument(run_parameters)
60+
self.assertEqual(i.__class__.__name__, 'MiSeq')
61+
self.assertEqual(i.completed_marker_file(), 'RTAComplete.txt')
62+
63+
# Default
64+
run_parameters = {
65+
'RunParameters': {
66+
'ScannerID': 'foo'
67+
}
68+
}
69+
i = InstrumentFactory.get_instrument(run_parameters)
70+
self.assertEqual(i.__class__.__name__, 'Instrument')
71+
self.assertEqual(i.completed_marker_file(), 'RTAComplete.txt')
72+
73+
74+
if __name__ == '__main__':
75+
unittest.main()

runfolder_tests/unit/runfolder_tests.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ def test_list_available_runfolders(self):
2828
"monitored_directories": [
2929
"/data/testarteria1/mon1",
3030
"/data/testarteria1/mon2"
31-
],
32-
"completed_marker_file": "RTAComplete.txt"
31+
]
3332
}
3433
runfolder_svc = RunfolderService(configuration_svc, logger)
3534

@@ -53,8 +52,7 @@ def test_next_runfolder(self):
5352
configuration_svc = {
5453
"monitored_directories": [
5554
"/data/testarteria1/mon1"
56-
],
57-
"completed_marker_file": "RTAComplete.txt"
55+
]
5856
}
5957

6058
# Since keys in configuration_svc can be directly indexed, we can mock it with a dict:

server.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from runfolder.app import start
2+
3+
4+
if __name__ == '__main__':
5+
start()

0 commit comments

Comments
 (0)