|
1 | 1 | #!/usr/bin/env python
|
2 |
| -import argparse |
3 | 2 | import glob
|
4 | 3 | import gzip
|
5 | 4 | import logging
|
6 | 5 | import multiprocessing
|
7 | 6 | import os
|
| 7 | +import sys |
8 | 8 | import uuid
|
9 | 9 | from collections import Counter
|
10 |
| -from datetime import datetime as dt |
11 | 10 | from itertools import groupby
|
12 | 11 |
|
13 | 12 | import numpy as np
|
@@ -39,7 +38,18 @@ def run_demux(args):
|
39 | 38 | ss = SampleSheet(args.samplesheet, args.ont_barcodes)
|
40 | 39 | version = pkg_resources.get_distribution("bio-anglerfish").version
|
41 | 40 | report = Report(args.run_name, run_uuid, version)
|
42 |
| - |
| 41 | + sys.stderr.write(""" |
| 42 | + ___ |
| 43 | + ( ) \ -..__ |
| 44 | + _.|~”~~~”…_ |
| 45 | + ^´ `>. |
| 46 | +(+ (+ ) “<..<^( |
| 47 | + `´ ``´ ___ ( |
| 48 | + \__..~ __( _…_( |
| 49 | + \ / |
| 50 | + “--…_ _..~%´ |
| 51 | + ```´´ |
| 52 | +""") |
43 | 53 | log.info(f" version {version}")
|
44 | 54 | log.info(f" arguments {vars(args)}")
|
45 | 55 | log.info(f" run uuid {run_uuid}")
|
@@ -118,7 +128,7 @@ def run_demux(args):
|
118 | 128 | "i5": {"i7_reversed": False, "i5_reversed": True},
|
119 | 129 | "i7+i5": {"i7_reversed": True, "i5_reversed": True},
|
120 | 130 | }
|
121 |
| - if args.force_rc is not None: |
| 131 | + if args.force_rc != "original": |
122 | 132 | log.info(
|
123 | 133 | f" Force reverse complementing {args.force_rc} index for adaptor {adaptor_name}. Lenient mode is disabled"
|
124 | 134 | )
|
@@ -245,105 +255,3 @@ def run_demux(args):
|
245 | 255 | report.write_report(args.out_fastq)
|
246 | 256 | report.write_json(args.out_fastq)
|
247 | 257 | report.write_dataframe(args.out_fastq, ss)
|
248 |
| - |
249 |
| - if args.skip_fastqc: |
250 |
| - log.warning( |
251 |
| - " As of version 0.4.1, built in support for FastQC + MultiQC is removed. The '-f' flag is redundant." |
252 |
| - ) |
253 |
| - |
254 |
| - |
255 |
| -def anglerfish(): |
256 |
| - parser = argparse.ArgumentParser( |
257 |
| - description="Tools to demux I7 and I5 barcodes when sequenced by single-molecules" |
258 |
| - ) |
259 |
| - parser.add_argument( |
260 |
| - "--samplesheet", |
261 |
| - "-s", |
262 |
| - required=True, |
263 |
| - help="CSV formatted list of samples and barcodes", |
264 |
| - ) |
265 |
| - parser.add_argument( |
266 |
| - "--out_fastq", |
267 |
| - "-o", |
268 |
| - default=".", |
269 |
| - help="Analysis output folder (default: Current dir)", |
270 |
| - ) |
271 |
| - parser.add_argument( |
272 |
| - "--threads", |
273 |
| - "-t", |
274 |
| - default=4, |
275 |
| - type=int, |
276 |
| - help="Number of threads to use (default: 4)", |
277 |
| - ) |
278 |
| - parser.add_argument( |
279 |
| - "--skip_demux", |
280 |
| - "-c", |
281 |
| - action="store_true", |
282 |
| - help="Only do BC counting and not demuxing", |
283 |
| - ) |
284 |
| - parser.add_argument( |
285 |
| - "--skip_fastqc", "-f", action="store_true", help=argparse.SUPPRESS |
286 |
| - ) |
287 |
| - parser.add_argument( |
288 |
| - "--max-distance", |
289 |
| - "-m", |
290 |
| - type=int, |
291 |
| - help="Manually set maximum edit distance for BC matching, automatically set this is set to either 1 or 2", |
292 |
| - ) |
293 |
| - parser.add_argument( |
294 |
| - "--max-unknowns", |
295 |
| - "-u", |
296 |
| - type=int, |
297 |
| - help="Maximum number of unknown indices to show in the output (default: length of samplesheet + 10)", |
298 |
| - ) |
299 |
| - parser.add_argument( |
300 |
| - "--run_name", |
301 |
| - "-r", |
302 |
| - default="anglerfish", |
303 |
| - help="Name of the run (default: anglerfish)", |
304 |
| - ) |
305 |
| - parser.add_argument( |
306 |
| - "--lenient", |
307 |
| - "-l", |
308 |
| - action="store_true", |
309 |
| - help="Will try reverse complementing the I5 and/or I7 indices and choose the best match.", |
310 |
| - ) |
311 |
| - parser.add_argument( |
312 |
| - "--lenient_factor", |
313 |
| - "-x", |
314 |
| - default=4.0, |
315 |
| - type=float, |
316 |
| - help="If lenient is set, this is the minimum factor of additional matches required to reverse complement the index (default: 4.0)", |
317 |
| - ) |
318 |
| - parser.add_argument( |
319 |
| - "--force_rc", |
320 |
| - "-p", |
321 |
| - choices=["i7", "i5", "i7+i5"], |
322 |
| - help="Force reverse complementing the I5 and/or I7 indices. This will disregard lenient mode.", |
323 |
| - ) |
324 |
| - parser.add_argument( |
325 |
| - "--ont_barcodes", |
326 |
| - "-n", |
327 |
| - action="store_true", |
328 |
| - help="Will assume the samplesheet refers to a single ONT run prepped with a barcoding kit. And will treat each barcode separately", |
329 |
| - ) |
330 |
| - parser.add_argument( |
331 |
| - "--debug", "-d", action="store_true", help="Extra commandline output" |
332 |
| - ) |
333 |
| - parser.add_argument( |
334 |
| - "--version", |
335 |
| - "-v", |
336 |
| - action="version", |
337 |
| - help="Print version and quit", |
338 |
| - version=f'anglerfish {pkg_resources.get_distribution("bio-anglerfish").version}', |
339 |
| - ) |
340 |
| - args = parser.parse_args() |
341 |
| - utcnow = dt.utcnow() |
342 |
| - runname = utcnow.strftime(f"{args.run_name}_%Y_%m_%d_%H%M%S") |
343 |
| - |
344 |
| - assert os.path.exists(args.out_fastq) |
345 |
| - assert os.path.exists(args.samplesheet) |
346 |
| - args.out_fastq = os.path.join(os.path.abspath(args.out_fastq), runname) |
347 |
| - args.samplesheet = os.path.abspath(args.samplesheet) |
348 |
| - args.run_name = runname |
349 |
| - run_demux(args) |
0 commit comments