Skip to content

Commit 93452ec

Browse files
authored
Revert "Readability improvements"
1 parent 519f74b commit 93452ec

File tree

9 files changed

+239
-357
lines changed

9 files changed

+239
-357
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"esbenp.prettier-vscode",
1919
"wholroyd.jinja",
2020
"ms-python.python",
21-
"charliermarsh.ruff",
21+
"charliermarsh.ruff@2024.2.0",
2222
"ms-azuretools.vscode-docker",
2323
],
2424
},

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@
66
.*_cache
77
node_modules
88
.vscode
9-
build

anglerfish/anglerfish.py

Lines changed: 30 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,6 @@
2828

2929
MAX_PROCESSES = 64 # Ought to be enough for anybody
3030

31-
anglerfish_logo = r"""
32-
___
33-
( ) \ -..__
34-
_.|~”~~~”…_
35-
^´ `>.
36-
(+ (+ ) “<..<^(
37-
`´ ``´ ___ (
38-
\__..~ __( _…_(
39-
\ /
40-
“--…_ _..~%´
41-
```´´
42-
"""
43-
4431

4532
def run_demux(args):
4633
multiprocessing.set_start_method("spawn")
@@ -51,54 +38,44 @@ def run_demux(args):
5138
ss = SampleSheet(args.samplesheet, args.ont_barcodes)
5239
version = pkg_resources.get_distribution("bio-anglerfish").version
5340
report = Report(args.run_name, run_uuid, version)
54-
sys.stderr.write(anglerfish_logo)
41+
sys.stderr.write("""
42+
___
43+
( ) \ -..__
44+
_.|~”~~~”…_
45+
^´ `>.
46+
(+ (+ ) “<..<^(
47+
`´ ``´ ___ (
48+
\__..~ __( _…_(
49+
\ /
50+
“--…_ _..~%´
51+
```´´
52+
""")
5553
log.info(f" version {version}")
5654
log.info(f" arguments {vars(args)}")
5755
log.info(f" run uuid {run_uuid}")
58-
min_distance = ss.minimum_bc_distance()
56+
bc_dist = ss.minimum_bc_distance()
5957
if args.max_distance is None:
60-
# Default: Set the maximum distance for barcode matching to 0, 1 or 2
61-
# depending on the smallest detected edit distance between indices in the samplesheet
62-
args.max_distance = min(min_distance - 1, 2)
58+
if bc_dist > 1:
59+
args.max_distance = 2
60+
else:
61+
args.max_distance = 1
6362
log.info(f"Using maximum edit distance of {args.max_distance}")
64-
if args.max_distance >= min_distance:
63+
if args.max_distance >= bc_dist:
6564
log.error(
66-
f" The maximum allowed edit distance for barcode matching (={args.max_distance})"
67-
+ f"is greater than the smallest detected edit distance between indices in samplesheet (={min_distance})"
68-
+ ", which will result in ambiguous matches."
65+
f" Edit distance of barcodes in samplesheet are less than the minimum specified {args.max_distance}>={bc_dist}"
6966
)
7067
exit()
71-
log.debug(f"Samplesheet bc_dist == {min_distance}")
68+
log.debug(f"Samplesheet bc_dist == {bc_dist}")
7269
if args.threads > MAX_PROCESSES:
7370
log.warning(
7471
f" Setting threads to {MAX_PROCESSES} as the maximum number of processes is {MAX_PROCESSES}"
7572
)
7673
args.threads = MAX_PROCESSES
7774

78-
## Sort the adaptors by type and size
79-
80-
# Get a list of tuples with the adaptor name and ONT barcode
81-
adaptor_tuples: list[tuple[str, str]] = [
82-
(entry.adaptor.name, entry.ont_barcode) for entry in ss
83-
]
84-
85-
# Convert to set to enforce uniqueness
86-
adaptor_set: set[tuple[str, str]] = set(adaptor_tuples)
87-
88-
# Create a dictionary with the adaptors as keys and an empty list as value
89-
adaptors_sorted: dict[tuple[str, str], list] = dict([(i, []) for i in adaptor_set])
90-
91-
# Populate the dictionary values with sample-specific information
92-
"""
93-
adaptors_sorted = {
94-
( adaptor_name, ont_barcode ) : [
95-
(sample_name, adaptor, fastq),
96-
(sample_name, adaptor, fastq),
97-
...
98-
],
99-
...
100-
}
101-
"""
75+
# Sort the adaptors by type and size
76+
adaptors_t = [(entry.adaptor.name, entry.ont_barcode) for entry in ss]
77+
adaptor_set = set(adaptors_t)
78+
adaptors_sorted = dict([(i, []) for i in adaptor_set])
10279
for entry in ss:
10380
adaptors_sorted[(entry.adaptor.name, entry.ont_barcode)].append(
10481
(entry.sample_name, entry.adaptor, os.path.abspath(entry.fastq))
@@ -114,19 +91,18 @@ def run_demux(args):
11491
adaptor_name, ont_barcode = key
11592
fastq_path = sample[0][2]
11693
# If there are multiple ONT barcodes, we need to add the ONT barcode to the adaptor name
94+
adaptor_bc_name = adaptor_name
11795
if ont_barcode:
118-
adaptor_bc_name = f"{adaptor_name}_{ont_barcode}"
119-
else:
120-
adaptor_bc_name = adaptor_name
96+
adaptor_bc_name = adaptor_name + "_" + ont_barcode
12197
fastq_files = glob.glob(fastq_path)
12298

12399
# Align
124-
align_path = os.path.join(args.out_fastq, f"{adaptor_bc_name}.paf")
100+
aln_path = os.path.join(args.out_fastq, f"{adaptor_bc_name}.paf")
125101
adaptor_path = os.path.join(args.out_fastq, f"{adaptor_name}.fasta")
126102
with open(adaptor_path, "w") as f:
127103
f.write(ss.get_fastastring(adaptor_name))
128104
for fq in fastq_files:
129-
run_minimap2(fq, adaptor_path, align_path, args.threads)
105+
run_minimap2(fq, adaptor_path, aln_path, args.threads)
130106

131107
# Easy line count in input fastq files
132108
num_fq = 0
@@ -135,7 +111,7 @@ def run_demux(args):
135111
for i in f:
136112
num_fq += 1
137113
num_fq = int(num_fq / 4)
138-
paf_entries = parse_paf_lines(align_path)
114+
paf_entries = parse_paf_lines(aln_path)
139115

140116
# Make stats
141117
log.info(f" Searching for adaptor hits in {adaptor_bc_name}")
@@ -277,8 +253,7 @@ def run_demux(args):
277253
sample_dists = [
278254
(
279255
lev.distance(
280-
i[0],
281-
f"{x.adaptor.i7.index_seq}+{x.adaptor.i5.index_seq}".lower(),
256+
i[0], f"{x.adaptor.i7_index}+{x.adaptor.i5_index}".lower()
282257
),
283258
x.sample_name,
284259
)

anglerfish/cli.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,7 @@ def run(
193193
typer.Option(
194194
"--max-distance",
195195
"-m",
196-
help="Manually set maximum allowed edit distance for index matching,"
197-
+ "by default this is set to 0, 1 or 2 based on the minimum detected"
198-
+ "index distance in the samplesheet.",
196+
help="Manually set maximum edit distance for BC matching, automatically set this is set to either 1 or 2",
199197
),
200198
] = 2,
201199
max_unknowns: Annotated[

0 commit comments

Comments
 (0)