Skip to content

Commit c26b40b

Browse files
authored
Support flexible years in licenses (#23) (#59)
1 parent 0e426c9 commit c26b40b

7 files changed

+190
-31
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ into separate repos:
5151
- src/license_header.txt # defaults to: LICENSE.txt
5252
- --comment-style
5353
- // # defaults to: #
54+
- --use-current-year
5455
```
5556
5657
### insert-license
@@ -84,6 +85,20 @@ In case you want to remove the comment headers introduced by
8485
3. Remove the `--remove-header` arg and update your `LICENSE.txt` ;
8586
4. Re-run the hook on all your files.
8687

88+
#### Handling years flexibly
89+
90+
You can add `--use-current-year` to change how the hook treats years in the
91+
headers:
92+
93+
- When inserting a header, the current year will always be inserted
94+
regardless of the year listed in the license file.
95+
- When modifying a file that already has a header, the hook will ensure the
96+
current year is listed in the header by using a range. For instance,
97+
`2015` or `2015-2018` would get updated to `2015-2023` in the year 2023.
98+
- When removing headers, the licenses will be removed regardless of the
99+
years they contain -- as if they used the year currently present in the
100+
license file.
101+
87102
#### Fuzzy license matching
88103

89104
In some cases your license files can contain several slightly different

pre_commit_hooks/insert_license.py

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55
import collections
66
import re
77
import sys
8+
from datetime import datetime
9+
from typing import Sequence
810

911
from fuzzywuzzy import fuzz
1012

1113
FUZZY_MATCH_TODO_COMMENT = (" TODO: This license is not consistent with"
1214
" license used in the project.")
1315
FUZZY_MATCH_TODO_INSTRUCTIONS = (
1416
" Delete the inconsistent license and above line"
15-
" and rerun pre-commit to insert a good license." )
17+
" and rerun pre-commit to insert a good license.")
1618
FUZZY_MATCH_EXTRA_LINES_TO_CHECK = 3
1719

1820
SKIP_LICENSE_INSERTION_COMMENT = "SKIP LICENSE INSERTION"
@@ -56,6 +58,11 @@ def main(argv=None):
5658
parser.add_argument('--insert-license-after-regex', default="",
5759
help="Insert license after line matching regex (ex: '^<\\?php$')")
5860
parser.add_argument('--remove-header', action='store_true')
61+
parser.add_argument(
62+
"--use-current-year",
63+
action="store_true",
64+
help=("Allow past years and ranges of years in headers. Use the current year in inserted and updated licenses."),
65+
)
5966
args = parser.parse_args(argv)
6067

6168
license_info = get_license_info(args)
@@ -78,6 +85,13 @@ def main(argv=None):
7885
return 0
7986

8087

88+
def _replace_year_in_license_with_current(plain_license: list[str]):
89+
current_year = datetime.now().year
90+
for i, line in enumerate(plain_license):
91+
plain_license[i] = re.sub(r"\b\d{4}\b", str(current_year), line)
92+
return plain_license
93+
94+
8195
def get_license_info(args) -> LicenseInfo:
8296
comment_start, comment_end = None, None
8397
comment_prefix = args.comment_style.replace('\\t', '\t')
@@ -86,6 +100,10 @@ def get_license_info(args) -> LicenseInfo:
86100
comment_start, comment_prefix, comment_end = comment_prefix.split('|')
87101
with open(args.license_filepath, encoding='utf8') as license_file:
88102
plain_license = license_file.readlines()
103+
104+
if args.use_current_year:
105+
plain_license = _replace_year_in_license_with_current(plain_license)
106+
89107
prefixed_license = [f'{comment_prefix}{extra_space if line.strip() else ""}{line}'
90108
for line in plain_license]
91109
eol = '\r\n' if prefixed_license[0][-2:] == '\r\n' else '\n'
@@ -138,7 +156,8 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo):
138156
license_header_index = find_license_header_index(
139157
src_file_content=src_file_content,
140158
license_info=license_info,
141-
top_lines_count=args.detect_license_in_X_top_lines)
159+
top_lines_count=args.detect_license_in_X_top_lines,
160+
match_years_strictly=not args.use_current_year)
142161
fuzzy_match_header_index = None
143162
if args.fuzzy_match_generates_todo and license_header_index is None:
144163
fuzzy_match_header_index = fuzzy_find_license_header_index(
@@ -150,6 +169,7 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo):
150169
)
151170
if license_header_index is not None:
152171
if license_found(remove_header=args.remove_header,
172+
update_year_range=args.use_current_year,
153173
license_header_index=license_header_index,
154174
license_info=license_info,
155175
src_file_content=src_file_content,
@@ -235,7 +255,53 @@ def license_not_found( # pylint: disable=too-many-arguments
235255
return False
236256

237257

238-
def license_found(remove_header, license_header_index, license_info, src_file_content, src_filepath, encoding): # pylint: disable=too-many-arguments
258+
# a year, then optionally a dash (with optional spaces before and after), and another year, surrounded by word boundaries
259+
_YEAR_RANGE_PATTERN = re.compile(r"\b\d{4}(?: *- *\d{2,4})?\b")
260+
261+
262+
def try_update_year_range(
263+
src_file_content: list[str],
264+
license_header_index: int,
265+
) -> tuple[Sequence[str], bool]:
266+
"""
267+
Updates the years in a copyright header in src_file_content by
268+
ensuring it contains a range ending in the current year.
269+
Does nothing if the current year is already present as the end of
270+
the range.
271+
The change will affect only the first line containing years.
272+
:param src_file_content: the lines in the source file
273+
:param license_header_index: line where the license starts
274+
:return: source file contents and a flag indicating update
275+
"""
276+
current_year = datetime.now().year
277+
for i in range(license_header_index, len(src_file_content)):
278+
line = src_file_content[i]
279+
matches = _YEAR_RANGE_PATTERN.findall(line)
280+
if matches:
281+
match = matches[-1]
282+
start_year = int(match[:4])
283+
end_year = match[5:]
284+
if not end_year or int(end_year) < current_year:
285+
updated = line.replace(match,
286+
str(start_year) + '-' + str(current_year))
287+
# verify the current list of years ends in the current one
288+
if _YEARS_PATTERN.findall(updated)[-1][-4:] != str(current_year):
289+
print(f"Unable to update year range in line: {line.rstrip()}. Got: {updated.rstrip()}")
290+
break
291+
src_file_content[i] = updated
292+
return src_file_content, True
293+
return src_file_content, False
294+
295+
296+
def license_found(
297+
remove_header,
298+
update_year_range,
299+
license_header_index,
300+
license_info,
301+
src_file_content,
302+
src_filepath,
303+
encoding,
304+
): # pylint: disable=too-many-arguments
239305
"""
240306
Executed when license is found. It does nothing if remove_header is False,
241307
removes the license if remove_header is True.
@@ -246,6 +312,7 @@ def license_found(remove_header, license_header_index, license_info, src_file_co
246312
:param src_filepath: path of the src_file
247313
:return: True if change was made, False otherwise
248314
"""
315+
updated = False
249316
if remove_header:
250317
last_license_line_index = license_header_index + len(license_info.prefixed_license)
251318
if last_license_line_index < len(src_file_content) and src_file_content[last_license_line_index].strip():
@@ -255,10 +322,15 @@ def license_found(remove_header, license_header_index, license_info, src_file_co
255322
src_file_content = src_file_content[:license_header_index] + \
256323
src_file_content[license_header_index +
257324
len(license_info.prefixed_license) + 1:]
325+
updated = True
326+
elif update_year_range:
327+
src_file_content, updated = try_update_year_range(src_file_content, license_header_index)
328+
329+
if updated:
258330
with open(src_filepath, 'w', encoding=encoding) as src_file:
259331
src_file.write(''.join(src_file_content))
260-
return True
261-
return False
332+
333+
return updated
262334

263335

264336
def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments
@@ -289,17 +361,39 @@ def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments
289361
return True
290362

291363

364+
# More flexible than _YEAR_RANGE_PATTERN. For detecting all years in a line, not just a range.
365+
_YEARS_PATTERN = re.compile(r"\b\d{4}([ ,-]+\d{2,4})*\b")
366+
367+
368+
def _strip_years(line):
369+
return _YEARS_PATTERN.sub("", line)
370+
371+
372+
def _license_line_matches(license_line, src_file_line, match_years_strictly):
373+
license_line = license_line.strip()
374+
src_file_line = src_file_line.strip()
375+
376+
if match_years_strictly:
377+
return license_line == src_file_line
378+
379+
return _strip_years(license_line) == _strip_years(src_file_line)
380+
381+
292382
def find_license_header_index(src_file_content,
293-
license_info,
294-
top_lines_count):
383+
license_info: LicenseInfo,
384+
top_lines_count,
385+
match_years_strictly):
295386
"""
296387
Returns the line number, starting from 0 and lower than `top_lines_count`,
297388
where the license header comment starts in this file, or else None.
298389
"""
299390
for i in range(top_lines_count):
300391
license_match = True
301392
for j, license_line in enumerate(license_info.prefixed_license):
302-
if i + j >= len(src_file_content) or license_line.strip() != src_file_content[i + j].strip():
393+
if (i + j >= len(src_file_content) or
394+
not _license_line_matches(license_line,
395+
src_file_content[i + j],
396+
match_years_strictly)):
303397
license_match = False
304398
break
305399
if license_match:

tests/insert_license_test.py

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from contextlib import contextmanager
2+
from datetime import datetime
23
from itertools import product
34
import os
45
import shutil
@@ -48,6 +49,12 @@
4849
('module_without_license.py', '#', 'module_with_license_nospace.py', True, ['--no-space-in-comment-prefix']),
4950
('module_without_license.php', '/*| *| */', 'module_with_license.php', True, ['--insert-license-after-regex', '^<\\?php$']),
5051
('module_without_license.py', '#', 'module_with_license_noeol.py', True, ['--no-extra-eol']),
52+
53+
('module_without_license.groovy', '//', 'module_with_license.groovy', True, ['--use-current-year']),
54+
('module_with_stale_year_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']),
55+
('module_with_stale_year_range_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']),
56+
('module_with_badly_formatted_stale_year_range_in_license.py', '#', 'module_with_badly_formatted_stale_year_range_in_license.py', False,
57+
['--use-current-year']),
5158
),
5259
)),
5360
)
@@ -69,6 +76,8 @@ def test_insert_license(license_file_path,
6976
if new_src_file_expected:
7077
with open(new_src_file_expected, encoding=encoding) as expected_content_file:
7178
expected_content = expected_content_file.read()
79+
if '--use-current-year' in args:
80+
expected_content = expected_content.replace("2017", str(datetime.now().year))
7281
new_file_content = path.open(encoding=encoding).read()
7382
assert new_file_content == expected_content
7483

@@ -127,23 +136,29 @@ def test_fuzzy_match_license(license_file_path,
127136

128137

129138
@pytest.mark.parametrize(
130-
('src_file_content', 'expected_index'),
139+
('src_file_content', 'expected_index', 'match_years_strictly'),
131140
(
132-
(['foo\n', 'bar\n'], None),
133-
(['# License line 1\n', '# License line 2\n', '\n', 'foo\n', 'bar\n'], 0),
134-
(['\n', '# License line 1\n', '# License line 2\n', 'foo\n', 'bar\n'], 1),
141+
(['foo\n', 'bar\n'], None, True),
142+
(['# License line 1\n', '# Copyright 2017\n', '\n', 'foo\n', 'bar\n'], 0, True),
143+
(['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, True),
144+
(['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, False),
145+
(['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], None, True),
146+
(['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], 0, False),
147+
(['\n', '# License line 1\n', '# Copyright 2013,2015-2016\n', 'foo\n', 'bar\n'], 1, False),
135148
),
136149
)
137-
def test_is_license_present(src_file_content, expected_index):
150+
def test_is_license_present(src_file_content, expected_index, match_years_strictly):
138151
license_info = LicenseInfo(
139152
plain_license="",
140153
eol="\n",
141154
comment_start="",
142155
comment_prefix="#",
143156
comment_end="",
144157
num_extra_lines=0,
145-
prefixed_license=['# License line 1\n', '# License line 2\n'])
146-
assert expected_index == find_license_header_index(src_file_content, license_info, 5)
158+
prefixed_license=['# License line 1\n', '# Copyright 2017\n'])
159+
assert expected_index == find_license_header_index(
160+
src_file_content, license_info, 5, match_years_strictly=match_years_strictly
161+
)
147162

148163

149164
@pytest.mark.parametrize(
@@ -152,27 +167,35 @@ def test_is_license_present(src_file_content, expected_index):
152167
'comment_style',
153168
'fuzzy_match',
154169
'new_src_file_expected',
155-
'fail_check'),
170+
'fail_check',
171+
'use_current_year'),
156172
map(lambda a: a[:1] + a[1], product( # combine license files with other args
157173
('LICENSE_with_trailing_newline.txt', 'LICENSE_without_trailing_newline.txt'),
158174
(
159-
('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True),
175+
('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True, False),
160176
('module_with_license_and_few_words.css', '/*| *| */', False,
161-
'module_without_license_and_few_words.css', True),
162-
('module_with_license_todo.css', '/*| *| */', False, None, True),
163-
('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False),
164-
('module_without_license.css', '/*| *| */', False, None, False),
165-
166-
('module_with_license.py', '#', False, 'module_without_license.py', True),
167-
('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True),
168-
('init_with_license.py', '#', False, 'init_without_license.py', True),
169-
('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True),
177+
'module_without_license_and_few_words.css', True, False),
178+
('module_with_license_todo.css', '/*| *| */', False, None, True, False),
179+
('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False, False),
180+
('module_without_license.css', '/*| *| */', False, None, False, False),
181+
182+
('module_with_license.py', '#', False, 'module_without_license.py', True, False),
183+
('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True, False),
184+
('init_with_license.py', '#', False, 'init_without_license.py', True, False),
185+
('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True, False),
170186
# Fuzzy match
171-
('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True),
172-
('module_with_license_todo.css', '/*| *| */', True, None, True),
173-
('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True),
174-
('module_without_license.css', '/*| *| */', True, None, False),
175-
('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True),
187+
('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True, False),
188+
('module_with_license_todo.css', '/*| *| */', True, None, True, False),
189+
('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True, False),
190+
('module_without_license.css', '/*| *| */', True, None, False, False),
191+
('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True, False),
192+
# Strict and flexible years
193+
('module_with_stale_year_in_license.py', '#', False, None, False, False),
194+
('module_with_stale_year_range_in_license.py', '#', False, None, False, False),
195+
('module_with_license.py', '#', False, 'module_without_license.py', True, True),
196+
('module_with_stale_year_in_license.py', '#', False, 'module_without_license.py', True, True),
197+
('module_with_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True),
198+
('module_with_badly_formatted_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True),
176199
),
177200
)),
178201
)
@@ -182,6 +205,7 @@ def test_remove_license(license_file_path,
182205
fuzzy_match,
183206
new_src_file_expected,
184207
fail_check,
208+
use_current_year,
185209
tmpdir):
186210
with chdir_to_test_resources():
187211
path = tmpdir.join('src_file_path')
@@ -191,6 +215,8 @@ def test_remove_license(license_file_path,
191215
'--comment-style', comment_style]
192216
if fuzzy_match:
193217
argv = ['--fuzzy-match-generates-todo'] + argv
218+
if use_current_year:
219+
argv = ['--use-current-year'] + argv
194220
assert insert_license(argv) == (1 if fail_check else 0)
195221
if new_src_file_expected:
196222
with open(new_src_file_expected, encoding='utf-8') as expected_content_file:
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (C) 2015 -- 16 Teela O'Malley
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
import sys
6+
sys.stdout.write("FOO")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (C) 2015 Teela O'Malley
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
import sys
6+
sys.stdout.write("FOO")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (C) 2015-2016 Teela O'Malley
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
import sys
6+
sys.stdout.write("FOO")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (C) 2015-2017 Teela O'Malley
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
import sys
6+
sys.stdout.write("FOO")

0 commit comments

Comments
 (0)