28
28
29
29
MAX_PROCESSES = 64 # Ought to be enough for anybody
30
30
31
- anglerfish_logo = r"""
32
- ___
33
- ( ) \ -..__
34
- _.|~”~~~”…_
35
- ^´ `>.
36
- (+ (+ ) “<..<^(
37
- `´ ``´ ___ (
38
- \__..~ __( _…_(
39
- \ /
40
- “--…_ _..~%´
41
- ```´´
42
- """
43
-
44
31
45
32
def run_demux (args ):
46
33
multiprocessing .set_start_method ("spawn" )
@@ -51,54 +38,44 @@ def run_demux(args):
51
38
ss = SampleSheet (args .samplesheet , args .ont_barcodes )
52
39
version = pkg_resources .get_distribution ("bio-anglerfish" ).version
53
40
report = Report (args .run_name , run_uuid , version )
54
- sys .stderr .write (anglerfish_logo )
41
+ sys .stderr .write ("""
42
+ ___
43
+ ( ) \ -..__
44
+ _.|~”~~~”…_
45
+ ^´ `>.
46
+ (+ (+ ) “<..<^(
47
+ `´ ``´ ___ (
48
+ \__..~ __( _…_(
49
+ \ /
50
+ “--…_ _..~%´
51
+ ```´´
52
+ """ )
55
53
log .info (f" version { version } " )
56
54
log .info (f" arguments { vars (args )} " )
57
55
log .info (f" run uuid { run_uuid } " )
58
- min_distance = ss .minimum_bc_distance ()
56
+ bc_dist = ss .minimum_bc_distance ()
59
57
if args .max_distance is None :
60
- # Default: Set the maximum distance for barcode matching to 0, 1 or 2
61
- # depending on the smallest detected edit distance between indices in the samplesheet
62
- args .max_distance = min (min_distance - 1 , 2 )
58
+ if bc_dist > 1 :
59
+ args .max_distance = 2
60
+ else :
61
+ args .max_distance = 1
63
62
log .info (f"Using maximum edit distance of { args .max_distance } " )
64
- if args .max_distance >= min_distance :
63
+ if args .max_distance >= bc_dist :
65
64
log .error (
66
- f" The maximum allowed edit distance for barcode matching (={ args .max_distance } )"
67
- + f"is greater than the smallest detected edit distance between indices in samplesheet (={ min_distance } )"
68
- + ", which will result in ambiguous matches."
65
+ f" Edit distance of barcodes in samplesheet are less than the minimum specified { args .max_distance } >={ bc_dist } "
69
66
)
70
67
exit ()
71
- log .debug (f"Samplesheet bc_dist == { min_distance } " )
68
+ log .debug (f"Samplesheet bc_dist == { bc_dist } " )
72
69
if args .threads > MAX_PROCESSES :
73
70
log .warning (
74
71
f" Setting threads to { MAX_PROCESSES } as the maximum number of processes is { MAX_PROCESSES } "
75
72
)
76
73
args .threads = MAX_PROCESSES
77
74
78
- ## Sort the adaptors by type and size
79
-
80
- # Get a list of tuples with the adaptor name and ONT barcode
81
- adaptor_tuples : list [tuple [str , str ]] = [
82
- (entry .adaptor .name , entry .ont_barcode ) for entry in ss
83
- ]
84
-
85
- # Convert to set to enforce uniqueness
86
- adaptor_set : set [tuple [str , str ]] = set (adaptor_tuples )
87
-
88
- # Create a dictionary with the adaptors as keys and an empty list as value
89
- adaptors_sorted : dict [tuple [str , str ], list ] = dict ([(i , []) for i in adaptor_set ])
90
-
91
- # Populate the dictionary values with sample-specific information
92
- """
93
- adaptors_sorted = {
94
- ( adaptor_name, ont_barcode ) : [
95
- (sample_name, adaptor, fastq),
96
- (sample_name, adaptor, fastq),
97
- ...
98
- ],
99
- ...
100
- }
101
- """
75
+ # Sort the adaptors by type and size
76
+ adaptors_t = [(entry .adaptor .name , entry .ont_barcode ) for entry in ss ]
77
+ adaptor_set = set (adaptors_t )
78
+ adaptors_sorted = dict ([(i , []) for i in adaptor_set ])
102
79
for entry in ss :
103
80
adaptors_sorted [(entry .adaptor .name , entry .ont_barcode )].append (
104
81
(entry .sample_name , entry .adaptor , os .path .abspath (entry .fastq ))
@@ -114,19 +91,18 @@ def run_demux(args):
114
91
adaptor_name , ont_barcode = key
115
92
fastq_path = sample [0 ][2 ]
116
93
# If there are multiple ONT barcodes, we need to add the ONT barcode to the adaptor name
94
+ adaptor_bc_name = adaptor_name
117
95
if ont_barcode :
118
- adaptor_bc_name = f"{ adaptor_name } _{ ont_barcode } "
119
- else :
120
- adaptor_bc_name = adaptor_name
96
+ adaptor_bc_name = adaptor_name + "_" + ont_barcode
121
97
fastq_files = glob .glob (fastq_path )
122
98
123
99
# Align
124
- align_path = os .path .join (args .out_fastq , f"{ adaptor_bc_name } .paf" )
100
+ aln_path = os .path .join (args .out_fastq , f"{ adaptor_bc_name } .paf" )
125
101
adaptor_path = os .path .join (args .out_fastq , f"{ adaptor_name } .fasta" )
126
102
with open (adaptor_path , "w" ) as f :
127
103
f .write (ss .get_fastastring (adaptor_name ))
128
104
for fq in fastq_files :
129
- run_minimap2 (fq , adaptor_path , align_path , args .threads )
105
+ run_minimap2 (fq , adaptor_path , aln_path , args .threads )
130
106
131
107
# Easy line count in input fastq files
132
108
num_fq = 0
@@ -135,7 +111,7 @@ def run_demux(args):
135
111
for i in f :
136
112
num_fq += 1
137
113
num_fq = int (num_fq / 4 )
138
- paf_entries = parse_paf_lines (align_path )
114
+ paf_entries = parse_paf_lines (aln_path )
139
115
140
116
# Make stats
141
117
log .info (f" Searching for adaptor hits in { adaptor_bc_name } " )
@@ -277,8 +253,7 @@ def run_demux(args):
277
253
sample_dists = [
278
254
(
279
255
lev .distance (
280
- i [0 ],
281
- f"{ x .adaptor .i7 .index_seq } +{ x .adaptor .i5 .index_seq } " .lower (),
256
+ i [0 ], f"{ x .adaptor .i7_index } +{ x .adaptor .i5_index } " .lower ()
282
257
),
283
258
x .sample_name ,
284
259
)
0 commit comments