diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3dc137..8a406db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,8 @@ jobs: ninja-build ccache lld # Install LLVM and Clang 20 wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" + UBUNTU_CODENAME=$(lsb_release -cs) + sudo apt-add-repository "deb http://apt.llvm.org/${UBUNTU_CODENAME}/ llvm-toolchain-${UBUNTU_CODENAME}-20 main" sudo apt-get update sudo apt-get install -y llvm-20 llvm-20-dev clang-20 libclang-20-dev echo "LLVM_DIR=/usr/lib/llvm-20/lib/cmake/llvm" >> $GITHUB_ENV @@ -108,12 +109,28 @@ jobs: run: ccache -s # Tests + - name: Restore run_test cache + uses: actions/cache@v4 + with: + path: .cache/run_test + key: run-test-${{ runner.os }}-${{ hashFiles('build/stack_usage_analyzer', 'test/**', 'run_test.py') }} + restore-keys: | + run-test-${{ runner.os }}- + + - name: Restore compile_ir cache + uses: actions/cache@v4 + with: + path: .cache/compile-ir + key: compile-ir-${{ runner.os }}-${{ hashFiles('test/**') }} + restore-keys: | + compile-ir-${{ runner.os }}- + - name: Test Stack Usage Analyzer timeout-minutes: 45 run: | TEST_JOBS="$(python3 -c 'import os; print(max(1, min(8, os.cpu_count() or 1)))')" echo "Running run_test.py with ${TEST_JOBS} job(s)" - EXTRA_ANALYZER_ARGS="" + EXTRA_ANALYZER_ARGS="--compile-ir-cache-dir=.cache/compile-ir" CORETRACE_RUN_TEST_EXTRA_ANALYZER_ARGS="${EXTRA_ANALYZER_ARGS}" \ python3 -u run_test.py --jobs="${TEST_JOBS}" diff --git a/run_test.py b/run_test.py index 580c149..be16554 100755 --- a/run_test.py +++ b/run_test.py @@ -38,6 +38,29 @@ class TestRunConfig: RUN_CONFIG = TestRunConfig() _CACHE_LOCK = threading.Lock() _MEM_CACHE = {} +_FILE_HASH_CACHE = {} + +def _get_file_hash(p: Path) -> str: + path_str = str(p) + try: + st = p.stat() + except OSError: + return "" + + # Use st_mtime_ns as a cache key for the hash + cache_key = (path_str, st.st_mtime_ns, st.st_size) + with _CACHE_LOCK: + if cache_key in _FILE_HASH_CACHE: + return _FILE_HASH_CACHE[cache_key] + + try: + h = hashlib.sha256(p.read_bytes()).hexdigest() + except OSError: + h = "" + + with _CACHE_LOCK: + _FILE_HASH_CACHE[cache_key] = h + return h # Set to True while the top-level parallel check phase is running. # Prevents nested ThreadPoolExecutor creation (N² process explosion). _PARALLEL_PHASE = False @@ -170,11 +193,9 @@ def _collect_cache_dependencies(args): candidates.add(p.resolve()) for p in sorted(candidates, key=lambda x: str(x)): - try: - st = p.stat() - except OSError: - continue - deps.append([str(p), st.st_mtime_ns, st.st_size]) + h = _get_file_hash(p) + if h: + deps.append([str(p), h]) return deps diff --git a/scripts/ci/run_code_analysis.py b/scripts/ci/run_code_analysis.py index 4ea7184..32c437e 100755 --- a/scripts/ci/run_code_analysis.py +++ b/scripts/ci/run_code_analysis.py @@ -7,6 +7,9 @@ import json import subprocess import sys +import os +import math +import concurrent.futures from pathlib import Path from typing import Iterable @@ -380,38 +383,80 @@ def main() -> int: ensure_parent(Path(args.sarif_out)) sarif_out_path = str(Path(args.sarif_out).resolve()) - print(f"Running analyzer on {len(selected_inputs)} file(s).") - cmd = analyzer_cmd( - analyzer=analyzer, - inputs=selected_inputs, - fmt="json", - compdb_path=compdb_path, - base_dir=args.base_dir, - extra_args=args.analyzer_arg, - sarif_out=sarif_out_path, - ) - run = subprocess.run(cmd, check=False, capture_output=True, text=True) - if run.returncode != 0: - if run.stdout: - sys.stdout.write(run.stdout) - if run.stderr: - sys.stderr.write(run.stderr) - return run.returncode - - try: - payload = json.loads(run.stdout) - except json.JSONDecodeError as exc: - print(f"Analyzer returned invalid JSON: {exc}", file=sys.stderr) + jobs = int(os.environ.get("ANALYZER_JOBS", os.cpu_count() or 1)) + chunk_size = max(1, math.ceil(len(selected_inputs) / jobs)) + chunks = [selected_inputs[i:i + chunk_size] for i in range(0, len(selected_inputs), chunk_size)] + + print(f"Running analyzer on {len(selected_inputs)} file(s) across {len(chunks)} job(s).") + + def run_chunk(i, chunk): + chunk_sarif = f"{sarif_out_path}.chunk{i}" if sarif_out_path else None + + cmd = analyzer_cmd( + analyzer=analyzer, + inputs=chunk, + fmt="json", + compdb_path=compdb_path, + base_dir=args.base_dir, + extra_args=args.analyzer_arg, + sarif_out=chunk_sarif, + ) + run = subprocess.run(cmd, check=False, capture_output=True, text=True) + return i, run, chunk_sarif + + diags = [] + has_error = False + all_sarif_files = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor: + futures = [executor.submit(run_chunk, i, c) for i, c in enumerate(chunks)] + for fut in concurrent.futures.as_completed(futures): + i, run, chunk_sarif = fut.result() + + if chunk_sarif and os.path.exists(chunk_sarif): + all_sarif_files.append(chunk_sarif) + + if run.returncode != 0: + if run.stdout: + sys.stdout.write(run.stdout) + if run.stderr: + sys.stderr.write(run.stderr) + has_error = True + else: + try: + payload = json.loads(run.stdout) + d = payload.get("diagnostics", []) + if isinstance(d, list): + diags.extend(d) + except json.JSONDecodeError as exc: + print(f"Analyzer returned invalid JSON: {exc}", file=sys.stderr) + has_error = True + + if has_error: return 2 + if sarif_out_path and all_sarif_files: + merged = None + for p in all_sarif_files: + with open(p, 'r') as f: + try: + data = json.load(f) + if merged is None: + merged = data + else: + if data.get("runs") and merged.get("runs"): + merged["runs"][0].setdefault("results", []).extend(data["runs"][0].get("results", [])) + except json.JSONDecodeError: + pass + os.unlink(p) + if merged: + with open(sarif_out_path, 'w') as f: + json.dump(merged, f) + if args.json_out: json_output_path = Path(args.json_out) ensure_parent(json_output_path) - json_output_path.write_text(run.stdout, encoding="utf-8") - - diags = payload.get("diagnostics", []) - if not isinstance(diags, list): - diags = [] + json_output_path.write_text(json.dumps({"diagnostics": diags}, indent=2), encoding="utf-8") errors = sum(1 for d in diags if sev(d) == "ERROR") warnings = sum(1 for d in diags if sev(d) == "WARNING") diff --git a/test/alloca/wrong-alloca.c b/test/alloca/wrong-alloca.c new file mode 100644 index 0000000..cb2fb9a --- /dev/null +++ b/test/alloca/wrong-alloca.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +#include +#include +#include + +int foo(uint8_t small_size) +{ + size_t size_allocation = (size_t)small_size * 1024; + char* buff = (char*)alloca(size_allocation); + + if (!buff) + goto error; + + return 0; + +error: + + return 1; +} + +// at line 9, column 25 +// [ !!Warn ] dynamic stack allocation detected for variable 'buff' +// ↳ allocated type: i8 +// ↳ size of this allocation is not compile-time constant (VLA / variable alloca) and may lead to unbounded stack usage + +// at line 9, column 25 +// [ !!Warn ] user-controlled alloca size for variable 'buff' +// ↳ allocation performed via alloca/VLA; stack usage grows with runtime value +// ↳ size is unbounded at compile time +// ↳ size depends on user-controlled input (function argument or non-local value)