Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions polymath_code_standard/insert_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,10 @@ def process_files(
except LicenseUpdateError as error:
print(error)
license_update_failed = True
elif args.wildcard_copyright_org and any_copyright_line_found(
src_file_content, args.detect_license_in_X_top_lines
):
pass # existing attribution in a different format — leave the file untouched
else:
if fuzzy_match_header_index is not None:
if fuzzy_license_found(
Expand Down Expand Up @@ -437,17 +441,31 @@ def fuzzy_license_found(


_YEARS_PATTERN = re.compile(r'\b\d{4}([ ,-]+\d{2,4})*\b')
_COPYRIGHT_C_PATTERN = re.compile(r'\(c\)\s*', re.IGNORECASE)
_YEAR_PRESENT_PATTERN = re.compile(r'(\b\d{4})-present\b', re.IGNORECASE)
_ALL_RIGHTS_RESERVED_PATTERN = re.compile(r'[.,]?\s*\ball rights reserved\.?\s*', re.IGNORECASE)


def _strip_years(line):
return _YEARS_PATTERN.sub('', line)


def _normalize_copyright_line(line: str) -> str:
"""Strip cosmetic decorators from a copyright line for loose comparison."""
line = _COPYRIGHT_C_PATTERN.sub('', line)
line = _YEAR_PRESENT_PATTERN.sub(r'\1', line)
line = _ALL_RIGHTS_RESERVED_PATTERN.sub('', line)
return ' '.join(line.split())


def _license_line_matches(license_line, src_file_line, match_years_strictly, wildcard_copyright_org=False):
license_line = license_line.strip()
src_file_line = src_file_line.strip()
if wildcard_copyright_org and _is_copyright_line(license_line):
return _is_copyright_line(src_file_line)
if _is_copyright_line(license_line):
license_line = _normalize_copyright_line(license_line)
src_file_line = _normalize_copyright_line(src_file_line)
if match_years_strictly:
return license_line == src_file_line
return _strip_years(license_line) == _strip_years(src_file_line)
Expand Down Expand Up @@ -482,6 +500,13 @@ def copyright_sentinel_found(src_file_content, top_lines_count):
return False


def any_copyright_line_found(src_file_content, top_lines_count):
for i in range(min(top_lines_count, len(src_file_content))):
if _is_copyright_line(src_file_content[i].strip()):
return True
return False


def skip_license_insert_found(src_file_content, skip_license_insertion_comment, top_lines_count):
for i in range(top_lines_count):
if i < len(src_file_content) and skip_license_insertion_comment in src_file_content[i]:
Expand Down
148 changes: 148 additions & 0 deletions tests/test_insert_license_wildcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
LicenseInfo,
_is_copyright_line,
_license_line_matches,
_normalize_copyright_line,
any_copyright_line_found,
copyright_sentinel_found,
find_license_header_index,
main,
Expand Down Expand Up @@ -80,6 +82,76 @@ def test_reuse_spdx_license_identifier_is_not_copyright(self):
assert not _is_copyright_line('# SPDX-License-Identifier: Apache-2.0')


# ---------------------------------------------------------------------------
# _normalize_copyright_line
# ---------------------------------------------------------------------------


class TestNormalizeCopyrightLine:
def test_strips_c_symbol(self):
assert _normalize_copyright_line('# Copyright (c) 2026 Acme Corp') == '# Copyright 2026 Acme Corp'

def test_strips_c_symbol_uppercase(self):
assert _normalize_copyright_line('# Copyright (C) 2026 Acme Corp') == '# Copyright 2026 Acme Corp'

def test_strips_present_suffix(self):
assert _normalize_copyright_line('# Copyright 2025-present Acme Corp') == '# Copyright 2025 Acme Corp'

def test_strips_all_rights_reserved(self):
assert (
_normalize_copyright_line('# Copyright 2026 Acme Corp. All rights reserved') == '# Copyright 2026 Acme Corp'
)

def test_strips_all_combined(self):
result = _normalize_copyright_line('// Copyright (c) 2025-present Acme Corp. All rights reserved')
assert result == '// Copyright 2025 Acme Corp'

def test_leaves_plain_line_unchanged(self):
assert _normalize_copyright_line('# Copyright 2026 Acme Corp') == '# Copyright 2026 Acme Corp'


# ---------------------------------------------------------------------------
# _license_line_matches — copyright decoration tolerance (no wildcard needed)
# ---------------------------------------------------------------------------


class TestLicenseLineMatchesDecoration:
def test_c_symbol_matches_plain_template(self):
assert _license_line_matches(
'// Copyright 2026 Acme Corp',
'// Copyright (c) 2026 Acme Corp',
match_years_strictly=False,
)

def test_present_suffix_matches_plain_template(self):
assert _license_line_matches(
'// Copyright 2026 Acme Corp',
'// Copyright 2025-present Acme Corp',
match_years_strictly=False,
)

def test_all_rights_reserved_matches_plain_template(self):
assert _license_line_matches(
'// Copyright 2026 Acme Corp',
'// Copyright 2026 Acme Corp. All rights reserved',
match_years_strictly=False,
)

def test_all_decorations_combined(self):
assert _license_line_matches(
'// Copyright 2026 Acme Corp',
'// Copyright (c) 2025-present Acme Corp. All rights reserved',
match_years_strictly=False,
)

def test_wrong_org_still_fails(self):
assert not _license_line_matches(
'// Copyright 2026 Acme Corp',
'// Copyright (c) 2025-present Other Corp. All rights reserved',
match_years_strictly=False,
)


# ---------------------------------------------------------------------------
# _license_line_matches — wildcard_copyright_org
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -138,6 +210,33 @@ def test_sentinel_beyond_top_lines_not_detected(self):
assert not copyright_sentinel_found(content, top_lines_count=2)


# ---------------------------------------------------------------------------
# any_copyright_line_found
# ---------------------------------------------------------------------------


class TestAnyCopyrightLineFound:
def test_finds_standard_copyright(self):
content = ['# Copyright (c) 2025-present Acme Corp. All rights reserved\n', 'import foo\n']
assert any_copyright_line_found(content, top_lines_count=5)

def test_finds_cpp_style(self):
content = ['// Copyright (c) 2025-present Acme Corp. All rights reserved\n', 'int x;\n']
assert any_copyright_line_found(content, top_lines_count=5)

def test_not_found_in_plain_code(self):
content = ['import foo\n', 'x = 1\n']
assert not any_copyright_line_found(content, top_lines_count=5)

def test_sentinel_line_also_counts(self):
content = [f'# Copyright 2026 {COPYRIGHT_ORG_SENTINEL}\n']
assert any_copyright_line_found(content, top_lines_count=5)

def test_beyond_top_lines_not_found(self):
content = ['import foo\n', 'import bar\n', '# Copyright 2026 Acme\n']
assert not any_copyright_line_found(content, top_lines_count=2)


# ---------------------------------------------------------------------------
# find_license_header_index — wildcard matching
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -280,3 +379,52 @@ def test_without_wildcard_rejects_different_org(self, tmp_path):
])
assert ret == 1
assert src.read_text() != original

def test_single_line_copyright_passes_with_wildcard(self, tmp_path):
# Existing repos often have a condensed copyright line without the full license block
content = '// Copyright (c) 2025-present Polymath Robotics, Inc. All rights reserved\nint x = 0;\n'
src = self._write(tmp_path, 'f.cpp', content)
lf = self._license_file(tmp_path)
ret = main([
'--license-filepath',
lf,
'--comment-style',
'//',
'--allow-past-years',
'--wildcard-copyright-org',
str(src),
])
assert ret == 0
assert src.read_text() == content # file untouched

def test_single_line_copyright_with_sentinel_still_fails(self, tmp_path):
content = f'// Copyright (c) 2025-present {COPYRIGHT_ORG_SENTINEL}\nint x = 0;\n'
src = self._write(tmp_path, 'f.cpp', content)
lf = self._license_file(tmp_path)
ret = main([
'--license-filepath',
lf,
'--comment-style',
'//',
'--allow-past-years',
'--wildcard-copyright-org',
str(src),
])
assert ret == 1
assert src.read_text() == content # file still untouched (sentinel check doesn't modify)

def test_no_copyright_at_all_inserts_sentinel(self, tmp_path):
content = 'int x = 0;\n'
src = self._write(tmp_path, 'f.cpp', content)
lf = self._license_file(tmp_path)
ret = main([
'--license-filepath',
lf,
'--comment-style',
'//',
'--allow-past-years',
'--wildcard-copyright-org',
str(src),
])
assert ret == 1
assert COPYRIGHT_ORG_SENTINEL in src.read_text()