From ded6eb1c3cb739541cb1d47aa31ad3ae3c0692ad Mon Sep 17 00:00:00 2001 From: Emerson Knapp Date: Wed, 29 Apr 2026 12:10:50 -0700 Subject: [PATCH] Allow for some variation in the license header line Signed-off-by: Emerson Knapp --- polymath_code_standard/insert_license.py | 25 ++++ tests/test_insert_license_wildcard.py | 148 +++++++++++++++++++++++ 2 files changed, 173 insertions(+) diff --git a/polymath_code_standard/insert_license.py b/polymath_code_standard/insert_license.py index ebec572..38baf41 100644 --- a/polymath_code_standard/insert_license.py +++ b/polymath_code_standard/insert_license.py @@ -262,6 +262,10 @@ def process_files( except LicenseUpdateError as error: print(error) license_update_failed = True + elif args.wildcard_copyright_org and any_copyright_line_found( + src_file_content, args.detect_license_in_X_top_lines + ): + pass # existing attribution in a different format — leave the file untouched else: if fuzzy_match_header_index is not None: if fuzzy_license_found( @@ -437,17 +441,31 @@ def fuzzy_license_found( _YEARS_PATTERN = re.compile(r'\b\d{4}([ ,-]+\d{2,4})*\b') +_COPYRIGHT_C_PATTERN = re.compile(r'\(c\)\s*', re.IGNORECASE) +_YEAR_PRESENT_PATTERN = re.compile(r'(\b\d{4})-present\b', re.IGNORECASE) +_ALL_RIGHTS_RESERVED_PATTERN = re.compile(r'[.,]?\s*\ball rights reserved\.?\s*', re.IGNORECASE) def _strip_years(line): return _YEARS_PATTERN.sub('', line) +def _normalize_copyright_line(line: str) -> str: + """Strip cosmetic decorators from a copyright line for loose comparison.""" + line = _COPYRIGHT_C_PATTERN.sub('', line) + line = _YEAR_PRESENT_PATTERN.sub(r'\1', line) + line = _ALL_RIGHTS_RESERVED_PATTERN.sub('', line) + return ' '.join(line.split()) + + def _license_line_matches(license_line, src_file_line, match_years_strictly, wildcard_copyright_org=False): license_line = license_line.strip() src_file_line = src_file_line.strip() if wildcard_copyright_org and _is_copyright_line(license_line): return _is_copyright_line(src_file_line) + if _is_copyright_line(license_line): + license_line = _normalize_copyright_line(license_line) + src_file_line = _normalize_copyright_line(src_file_line) if match_years_strictly: return license_line == src_file_line return _strip_years(license_line) == _strip_years(src_file_line) @@ -482,6 +500,13 @@ def copyright_sentinel_found(src_file_content, top_lines_count): return False +def any_copyright_line_found(src_file_content, top_lines_count): + for i in range(min(top_lines_count, len(src_file_content))): + if _is_copyright_line(src_file_content[i].strip()): + return True + return False + + def skip_license_insert_found(src_file_content, skip_license_insertion_comment, top_lines_count): for i in range(top_lines_count): if i < len(src_file_content) and skip_license_insertion_comment in src_file_content[i]: diff --git a/tests/test_insert_license_wildcard.py b/tests/test_insert_license_wildcard.py index 09c490d..31fa07f 100644 --- a/tests/test_insert_license_wildcard.py +++ b/tests/test_insert_license_wildcard.py @@ -7,6 +7,8 @@ LicenseInfo, _is_copyright_line, _license_line_matches, + _normalize_copyright_line, + any_copyright_line_found, copyright_sentinel_found, find_license_header_index, main, @@ -80,6 +82,76 @@ def test_reuse_spdx_license_identifier_is_not_copyright(self): assert not _is_copyright_line('# SPDX-License-Identifier: Apache-2.0') +# --------------------------------------------------------------------------- +# _normalize_copyright_line +# --------------------------------------------------------------------------- + + +class TestNormalizeCopyrightLine: + def test_strips_c_symbol(self): + assert _normalize_copyright_line('# Copyright (c) 2026 Acme Corp') == '# Copyright 2026 Acme Corp' + + def test_strips_c_symbol_uppercase(self): + assert _normalize_copyright_line('# Copyright (C) 2026 Acme Corp') == '# Copyright 2026 Acme Corp' + + def test_strips_present_suffix(self): + assert _normalize_copyright_line('# Copyright 2025-present Acme Corp') == '# Copyright 2025 Acme Corp' + + def test_strips_all_rights_reserved(self): + assert ( + _normalize_copyright_line('# Copyright 2026 Acme Corp. All rights reserved') == '# Copyright 2026 Acme Corp' + ) + + def test_strips_all_combined(self): + result = _normalize_copyright_line('// Copyright (c) 2025-present Acme Corp. All rights reserved') + assert result == '// Copyright 2025 Acme Corp' + + def test_leaves_plain_line_unchanged(self): + assert _normalize_copyright_line('# Copyright 2026 Acme Corp') == '# Copyright 2026 Acme Corp' + + +# --------------------------------------------------------------------------- +# _license_line_matches — copyright decoration tolerance (no wildcard needed) +# --------------------------------------------------------------------------- + + +class TestLicenseLineMatchesDecoration: + def test_c_symbol_matches_plain_template(self): + assert _license_line_matches( + '// Copyright 2026 Acme Corp', + '// Copyright (c) 2026 Acme Corp', + match_years_strictly=False, + ) + + def test_present_suffix_matches_plain_template(self): + assert _license_line_matches( + '// Copyright 2026 Acme Corp', + '// Copyright 2025-present Acme Corp', + match_years_strictly=False, + ) + + def test_all_rights_reserved_matches_plain_template(self): + assert _license_line_matches( + '// Copyright 2026 Acme Corp', + '// Copyright 2026 Acme Corp. All rights reserved', + match_years_strictly=False, + ) + + def test_all_decorations_combined(self): + assert _license_line_matches( + '// Copyright 2026 Acme Corp', + '// Copyright (c) 2025-present Acme Corp. All rights reserved', + match_years_strictly=False, + ) + + def test_wrong_org_still_fails(self): + assert not _license_line_matches( + '// Copyright 2026 Acme Corp', + '// Copyright (c) 2025-present Other Corp. All rights reserved', + match_years_strictly=False, + ) + + # --------------------------------------------------------------------------- # _license_line_matches — wildcard_copyright_org # --------------------------------------------------------------------------- @@ -138,6 +210,33 @@ def test_sentinel_beyond_top_lines_not_detected(self): assert not copyright_sentinel_found(content, top_lines_count=2) +# --------------------------------------------------------------------------- +# any_copyright_line_found +# --------------------------------------------------------------------------- + + +class TestAnyCopyrightLineFound: + def test_finds_standard_copyright(self): + content = ['# Copyright (c) 2025-present Acme Corp. All rights reserved\n', 'import foo\n'] + assert any_copyright_line_found(content, top_lines_count=5) + + def test_finds_cpp_style(self): + content = ['// Copyright (c) 2025-present Acme Corp. All rights reserved\n', 'int x;\n'] + assert any_copyright_line_found(content, top_lines_count=5) + + def test_not_found_in_plain_code(self): + content = ['import foo\n', 'x = 1\n'] + assert not any_copyright_line_found(content, top_lines_count=5) + + def test_sentinel_line_also_counts(self): + content = [f'# Copyright 2026 {COPYRIGHT_ORG_SENTINEL}\n'] + assert any_copyright_line_found(content, top_lines_count=5) + + def test_beyond_top_lines_not_found(self): + content = ['import foo\n', 'import bar\n', '# Copyright 2026 Acme\n'] + assert not any_copyright_line_found(content, top_lines_count=2) + + # --------------------------------------------------------------------------- # find_license_header_index — wildcard matching # --------------------------------------------------------------------------- @@ -280,3 +379,52 @@ def test_without_wildcard_rejects_different_org(self, tmp_path): ]) assert ret == 1 assert src.read_text() != original + + def test_single_line_copyright_passes_with_wildcard(self, tmp_path): + # Existing repos often have a condensed copyright line without the full license block + content = '// Copyright (c) 2025-present Polymath Robotics, Inc. All rights reserved\nint x = 0;\n' + src = self._write(tmp_path, 'f.cpp', content) + lf = self._license_file(tmp_path) + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '//', + '--allow-past-years', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 0 + assert src.read_text() == content # file untouched + + def test_single_line_copyright_with_sentinel_still_fails(self, tmp_path): + content = f'// Copyright (c) 2025-present {COPYRIGHT_ORG_SENTINEL}\nint x = 0;\n' + src = self._write(tmp_path, 'f.cpp', content) + lf = self._license_file(tmp_path) + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '//', + '--allow-past-years', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 1 + assert src.read_text() == content # file still untouched (sentinel check doesn't modify) + + def test_no_copyright_at_all_inserts_sentinel(self, tmp_path): + content = 'int x = 0;\n' + src = self._write(tmp_path, 'f.cpp', content) + lf = self._license_file(tmp_path) + ret = main([ + '--license-filepath', + lf, + '--comment-style', + '//', + '--allow-past-years', + '--wildcard-copyright-org', + str(src), + ]) + assert ret == 1 + assert COPYRIGHT_ORG_SENTINEL in src.read_text()