Skip to content

Commit ec25bfe

Browse files
authored
Merge pull request #85 from kedhammar/ake-dev
Misc. improvements, unit tests and mypy proofing
2 parents 519f74b + 53a2718 commit ec25bfe

20 files changed

+765
-145
lines changed

.github/workflows/anglerfish.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: ubuntu-latest
88
strategy:
99
matrix:
10-
python-version: [3.10.2]
10+
python-version: [3.12]
1111

1212
steps:
1313
# Checkout code and install miniconda + environment
@@ -37,10 +37,10 @@ jobs:
3737
- shell: bash -l {0}
3838
name: Run anglerfish with test data
3939
run: |
40-
anglerfish run -s test/samples.csv
40+
anglerfish run -s testdata/samples.csv
4141
4242
# Run anglerfish explore
4343
- shell: bash -l {0}
4444
name: Run anglerfish explore
4545
run: |
46-
anglerfish explore -f test/BC18_P14351_1001.fastq.gz -o test/explore_output
46+
anglerfish explore -f testdata/BC18_P14351_1001.fastq.gz -o explore_output

.github/workflows/lint-code.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
- name: Set up Python
1717
uses: actions/setup-python@v4
1818
with:
19-
python-version: "3.10"
19+
python-version: "3.12"
2020
- name: Install dependencies
2121
run: |
2222
python -m pip install --upgrade pip
@@ -34,7 +34,7 @@ jobs:
3434
- name: Set up Python
3535
uses: actions/setup-python@v4
3636
with:
37-
python-version: "3.10"
37+
python-version: "3.12"
3838
- name: Install dependencies
3939
run: |
4040
python -m pip install --upgrade pip
@@ -51,17 +51,17 @@ jobs:
5151
- name: Set up Python
5252
uses: actions/setup-python@v4
5353
with:
54-
python-version: "3.10"
54+
python-version: "3.12"
5555
- name: Install dependencies
5656
run: |
5757
python -m pip install --upgrade pip
5858
pip install mypy
5959
# Start by installing type stubs
6060
- name: mypy --> Install stubs
61-
run: echo -e "y" | mypy --install-types **/*.py || exit 0
61+
run: echo -e "y" | mypy --install-types . || exit 0
6262
- name: mypy --> Static type checking
6363
# Configured in pyprojet.toml
64-
run: mypy **/*.py
64+
run: mypy .
6565

6666
# Use Prettier to check various file formats
6767
prettier:

.github/workflows/pypi.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
- uses: actions/checkout@v4
1212
name: Check out source-code repository
1313

14-
- name: Set up Python 3.10
14+
- name: Set up Python 3.12
1515
uses: actions/setup-python@v4
1616
with:
17-
python-version: 3.10.10
17+
python-version: 3.12
1818

1919
- name: Install python dependencies
2020
run: |

.github/workflows/test-code.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: test-code
2+
on: [push, pull_request]
3+
4+
# Cancel if a newer run is started
5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
7+
cancel-in-progress: true
8+
9+
jobs:
10+
run_pytest:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
python-version: [3.12]
15+
16+
steps:
17+
# Checkout code and install miniconda + environment
18+
- uses: actions/checkout@v4
19+
- uses: mamba-org/setup-micromamba@v1
20+
with:
21+
init-shell: bash
22+
create-args: >-
23+
python=${{ matrix.python-version }}
24+
pip
25+
environment-file: environment.yml
26+
environment-name: anglerfish-dev
27+
28+
# Install Anglerfish
29+
- shell: bash -l {0}
30+
name: Install Anglerfish
31+
run: |
32+
python -m pip install .
33+
34+
# Install Pytest
35+
- shell: bash -l {0}
36+
name: Install Pytest
37+
run: |
38+
python -m pip install pytest
39+
40+
# Run Pytest
41+
- shell: bash -l {0}
42+
name: Run Pytest
43+
run: |
44+
pytest .

.gitignore

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
*.egg-info
12
*.pyc
23
*~
3-
*.egg-info
4+
.*_cache
45
.DS_Store
56
.benchmarks
6-
.*_cache
7-
node_modules
7+
.ignoredir
88
.vscode
9+
__pycache__
910
build
11+
node_modules

anglerfish/anglerfish.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import pkg_resources
1515

16+
from .demux.adaptor import Adaptor
1617
from .demux.demux import (
1718
cluster_matches,
1819
layout_matches,
@@ -86,14 +87,16 @@ def run_demux(args):
8687
adaptor_set: set[tuple[str, str]] = set(adaptor_tuples)
8788

8889
# Create a dictionary with the adaptors as keys and an empty list as value
89-
adaptors_sorted: dict[tuple[str, str], list] = dict([(i, []) for i in adaptor_set])
90+
adaptors_sorted: dict[tuple[str, str], list[tuple[str, Adaptor, str]]] = dict(
91+
[(i, []) for i in adaptor_set]
92+
)
9093

9194
# Populate the dictionary values with sample-specific information
9295
"""
9396
adaptors_sorted = {
94-
( adaptor_name, ont_barcode ) : [
95-
(sample_name, adaptor, fastq),
96-
(sample_name, adaptor, fastq),
97+
adaptor_name_str, ont_barcode_str ) : [
98+
(sample_name_str, Adaptor, fastq_str),
99+
(sample_name_str, Adaptor, fastq_str),
97100
...
98101
],
99102
...
@@ -168,7 +171,7 @@ def run_demux(args):
168171
**flips[args.force_rc],
169172
)
170173
flipped_i7, flipped_i5 = flips[args.force_rc].values()
171-
elif args.lenient: # Try reverse complementing the I5 and/or i7 indices and choose the best match
174+
elif args.lenient: # Try reverse complementing the i5 and/or i7 indices and choose the best match
172175
flipped = {}
173176
results = []
174177
pool = multiprocessing.Pool(

anglerfish/demux/adaptor.py

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,36 @@ class AdaptorPart:
6363
"""This class is used for the i5 or i7 adaptor."""
6464

6565
def __init__(self, sequence_token: str, name: str, index_seq: str | None):
66+
## Type declaration of attributes to be assigned upon instantiation
67+
# Attributes from arguments
68+
self.name: str
69+
self.sequence_token: str
70+
self.index_seq: str | None
71+
72+
# Index attributes
73+
self.has_index: bool
74+
self.len_index: int | None
75+
self.len_before_index: int | None
76+
self.len_after_index: int | None
77+
78+
# UMI attributes
79+
self.has_umi: bool
80+
self.len_umi: int | None
81+
self.len_umi_before_index: int | None
82+
self.len_umi_after_index: int | None
83+
84+
# Length attributes
85+
self.len_total: int | None
86+
self.len_constant: int
87+
88+
# Instantiation outsorced to private method
89+
self._setup(sequence_token, name, index_seq)
90+
91+
def _setup(self, sequence_token: str, name: str, index_seq: str | None):
6692
# Assign attributes from args
67-
self.sequence_token: str = sequence_token
68-
self.name: str = name
69-
self.index_seq: str | None = index_seq
93+
self.sequence_token = sequence_token
94+
self.name = name
95+
self.index_seq = index_seq
7096

7197
# Index bool and len
7298
if has_match(INDEX_TOKEN, self.sequence_token):
@@ -76,6 +102,10 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
76102
self.len_index = len(index_seq) if index_seq else None
77103

78104
else:
105+
if self.index_seq is not None:
106+
raise UserWarning(
107+
"Index sequence specified, but no index token found in adaptor sequence."
108+
)
79109
self.has_index = False
80110
self.len_index = 0
81111

@@ -87,21 +117,13 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
87117
)
88118
elif len(umi_tokens) == 1:
89119
self.has_umi = True
90-
self.len_umi = int(
91-
re.search(UMI_LENGTH_TOKEN, self.sequence_token).group(1)
92-
)
120+
umi_token_search = re.search(UMI_LENGTH_TOKEN, self.sequence_token)
121+
assert isinstance(umi_token_search, re.Match)
122+
self.len_umi = int(umi_token_search.group(1))
93123
else:
94124
self.has_umi = False
95125
self.len_umi = 0
96126

97-
# Type declaration of attributes to be assigned
98-
self.len_before_index: int | None
99-
self.len_after_index: int | None
100-
self.len_umi_before_index: int | None
101-
self.len_umi_after_index: int | None
102-
self.len_total: int | None
103-
self.len_constant: int
104-
105127
# Lengths
106128
if self.has_index and self.has_umi:
107129
# Index and UMI
@@ -149,7 +171,12 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
149171
self.len_before_index = None
150172
self.len_after_index = None
151173

152-
self.len_total = len(self.get_mask(insert_Ns=True)) if self.index_seq else None
174+
if (
175+
self.has_index is True and self.index_seq is not None
176+
) or self.has_index is False:
177+
self.len_total = len(self.get_mask(insert_Ns=True))
178+
else:
179+
self.len_total = None
153180
self.len_constant = len(self.get_mask(insert_Ns=False))
154181

155182
def get_mask(self, insert_Ns: bool = True) -> str:
@@ -165,11 +192,12 @@ def get_mask(self, insert_Ns: bool = True) -> str:
165192
else 0
166193
)
167194

168-
umi_mask_length = (
169-
max(self.len_umi_after_index, self.len_umi_before_index)
170-
if insert_Ns and self.has_umi
171-
else 0
172-
)
195+
if insert_Ns and self.has_umi:
196+
assert self.len_umi_before_index is not None
197+
assert self.len_umi_after_index is not None
198+
umi_mask_length = max(self.len_umi_after_index, self.len_umi_before_index)
199+
else:
200+
umi_mask_length = 0
173201

174202
# Test if the index is specified in the adaptor sequence when it shouldn't be
175203
if (
@@ -189,7 +217,7 @@ def get_mask(self, insert_Ns: bool = True) -> str:
189217
return self.sequence_token
190218

191219

192-
def has_match(pattern: re.Pattern, query: str) -> bool:
220+
def has_match(pattern: re.Pattern | str, query: str) -> bool:
193221
"""General function to check if a string contains a pattern."""
194222
match = re.search(pattern, query)
195223
if match is None:
@@ -209,6 +237,8 @@ def validate_adaptors(adaptors_dict: dict):
209237
f"Adaptor {adaptor_name} has an invalid sequence for {i}: {sequence_token}. Does not conform to the pattern {VALID_SEQUENCE_TOKEN_PATTERN}."
210238
)
211239

240+
return True
241+
212242

213243
def load_adaptors(raw: bool = False) -> list[Adaptor] | dict:
214244
"""Fetch all adaptors.
@@ -226,7 +256,7 @@ def load_adaptors(raw: bool = False) -> list[Adaptor] | dict:
226256
adaptors_dict = yaml.safe_load(f)
227257

228258
# Validate input
229-
validate_adaptors(adaptors_dict)
259+
assert validate_adaptors(adaptors_dict) is True
230260

231261
# Optionally, return raw dict
232262
if raw:

0 commit comments

Comments
 (0)