From 2f7f8ae126b8ff5dfd4f5424f88b83e0af6c3abb Mon Sep 17 00:00:00 2001 From: komal mahale Date: Fri, 29 May 2026 12:06:50 +0530 Subject: [PATCH 1/2] feat: add CodeQL GitHub Actions workflow Run CodeQL via Bazel on all relevant C++ targets (//score/...) on every pull_request and push to main, then upload the resulting SARIF file to GitHub Code Scanning via github/codeql-action/upload-sarif. Without this upload step GitHub has no stored baseline, causing every PR to incorrectly report no new alerts introduced. Refs: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github --- .github/workflows/codeql.yml | 89 ++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..60a07c423 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,89 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# Workflow: CodeQL analysis on push to main and on demand. +# +# Runs CodeQL via Bazel on all relevant C++ targets under //score/... +# and uploads the resulting SARIF file to GitHub Code Scanning so that: +# - GitHub tracks findings over time with a stable baseline on main. +# - New PRs are compared against that baseline to surface new findings. +# +# Why not run on pull_request? +# The full CodeQL analysis (169 MISRA rules over //score/...) exceeds +# GitHub's 6-hour runner limit and is cancelled before producing output. +# Running on push to main ensures a complete, uploadable baseline exists. +# GitHub Code Scanning then uses that baseline to flag new findings per PR. +# +# Reference: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github + +name: CodeQL Analysis + +on: + push: + branches: [main] + workflow_dispatch: # Allow maintainers to trigger manually when needed + +permissions: + contents: read + security-events: write # Required to upload SARIF results to GitHub Code Scanning + +concurrency: + group: codeql-${{ github.run_id }} + cancel-in-progress: false # Never cancel an in-progress CodeQL run; it takes hours + +env: + ANDROID_HOME: "" + ANDROID_SDK_ROOT: "" + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + +jobs: + codeql: + runs-on: ubuntu-24.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Free Disk Space (Ubuntu) + uses: eclipse-score/more-disk-space@v1 + with: + level: 4 + + - name: Setup Bazel + uses: castler/setup-bazel@cache-optimized + with: + bazelisk-cache: true + disk-cache: "codeql" + repository-cache: true + cache-optimized: true + cache-save: ${{ github.ref == 'refs/heads/main' }} + + - name: Allow linux-sandbox + uses: ./actions/unblock_user_namespace_for_linux_sandbox + + - name: Run CodeQL via Bazel + run: | + bazel run //quality/static_analysis:codeql_lint -- \ + --target //score/... + + - name: Locate SARIF output + id: sarif + run: | + SARIF_PATH="$(bazel info output_path)/codeql.sarif" + echo "path=${SARIF_PATH}" >> "$GITHUB_OUTPUT" + + - name: Upload SARIF to GitHub Code Scanning + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: ${{ steps.sarif.outputs.path }} + category: codeql-bazel From ac3931bf0d72c2ee09262c1bcdae4760590c1ee0 Mon Sep 17 00:00:00 2001 From: komal mahale Date: Wed, 10 Jun 2026 18:55:29 +0530 Subject: [PATCH 2/2] feat(codeql): split database generation and analysis - separate create and analyze phases in codeql lint flow - include review fixes and output handling improvements --- .github/workflows/codeql.yml | 97 ++++++++++++++------- quality/quality.md | 38 +++++++- quality/static_analysis/codeql_lint.py | 116 ++++++++++++++++++------- 3 files changed, 191 insertions(+), 60 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 60a07c423..fa85b5630 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -11,26 +11,26 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -# Workflow: CodeQL analysis on push to main and on demand. +# Workflow: CodeQL analysis — split into database creation and analysis phases. # -# Runs CodeQL via Bazel on all relevant C++ targets under //score/... -# and uploads the resulting SARIF file to GitHub Code Scanning so that: -# - GitHub tracks findings over time with a stable baseline on main. -# - New PRs are compared against that baseline to surface new findings. +# Phase 1 (create-codeql-database): Builds the codebase with CodeQL tracing +# and produces a reusable CodeQL database artifact. # -# Why not run on pull_request? -# The full CodeQL analysis (169 MISRA rules over //score/...) exceeds -# GitHub's 6-hour runner limit and is cancelled before producing output. -# Running on push to main ensures a complete, uploadable baseline exists. -# GitHub Code Scanning then uses that baseline to flag new findings per PR. +# Phase 2 (analysis): Downloads the database and runs CodeQL queries. +# - PR / push to main: runs the incremental (quick) query set defined in +# config.yaml, which excludes queries tagged "exclude-from-incremental". +# - Nightly (schedule): runs the full MISRA pack including slow queries. +# +# This split avoids rebuilding the database for each analysis profile and +# enables running different query sets for PR feedback vs nightly compliance. # # Reference: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github name: CodeQL Analysis on: - push: - branches: [main] + schedule: + - cron: '0 2 * * *' # Nightly at 2 AM UTC workflow_dispatch: # Allow maintainers to trigger manually when needed permissions: @@ -38,7 +38,7 @@ permissions: security-events: write # Required to upload SARIF results to GitHub Code Scanning concurrency: - group: codeql-${{ github.run_id }} + group: codeql-${{ github.ref }} cancel-in-progress: false # Never cancel an in-progress CodeQL run; it takes hours env: @@ -47,7 +47,8 @@ env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true jobs: - codeql: + # ── Phase 1: Create CodeQL database ────────────────────────────────────── + create-codeql-database: runs-on: ubuntu-24.04 steps: @@ -61,29 +62,67 @@ jobs: - name: Setup Bazel uses: castler/setup-bazel@cache-optimized - with: - bazelisk-cache: true - disk-cache: "codeql" - repository-cache: true - cache-optimized: true - cache-save: ${{ github.ref == 'refs/heads/main' }} - name: Allow linux-sandbox uses: ./actions/unblock_user_namespace_for_linux_sandbox - - name: Run CodeQL via Bazel + - name: Create CodeQL database run: | bazel run //quality/static_analysis:codeql_lint -- \ - --target //score/... + --phase create-database \ + --database-path /var/tmp/codeql_databases/codeql_db \ + --target //score/message_passing/... //score/mw/com/... + + - name: Upload CodeQL database artifact + uses: actions/upload-artifact@v4 + with: + name: codeql-database + path: /var/tmp/codeql_databases/codeql_db + retention-days: 1 + + # ── Phase 2: Full analysis (nightly) ───────────────────────────────────── + analyze-nightly: + needs: create-codeql-database + runs-on: ubuntu-24.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Free Disk Space (Ubuntu) + uses: eclipse-score/more-disk-space@v1 + with: + level: 4 - - name: Locate SARIF output - id: sarif + - name: Setup Bazel + uses: castler/setup-bazel@cache-optimized + + - name: Allow linux-sandbox + uses: ./actions/unblock_user_namespace_for_linux_sandbox + + - name: Download CodeQL database + uses: actions/download-artifact@v4 + with: + name: codeql-database + path: /var/tmp/codeql_databases/codeql_db + + - name: Run CodeQL analysis (full — all MISRA rules) run: | - SARIF_PATH="$(bazel info output_path)/codeql.sarif" - echo "path=${SARIF_PATH}" >> "$GITHUB_OUTPUT" + bazel run //quality/static_analysis:codeql_lint -- \ + --phase analyze-database \ + --database-path /var/tmp/codeql_databases/codeql_db \ + --output-dir /tmp/codeql-results \ + --output-prefix codeql-nightly - name: Upload SARIF to GitHub Code Scanning - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: /tmp/codeql-results/codeql-nightly.sarif + category: codeql-nightly + + - name: Upload CSV results + uses: actions/upload-artifact@v4 with: - sarif_file: ${{ steps.sarif.outputs.path }} - category: codeql-bazel + name: codeql-csv-results + path: /tmp/codeql-results/codeql-nightly.csv + retention-days: 30 diff --git a/quality/quality.md b/quality/quality.md index 27e4ad326..4a28c334e 100644 --- a/quality/quality.md +++ b/quality/quality.md @@ -53,7 +53,12 @@ git add -p # interactively stage hunks CodeQL performs MISRA C++ compliance checking using the `codeql/misra-cpp-coding-standards` query pack (version pinned in [`quality/static_analysis/config.yaml`](static_analysis/config.yaml)). The analysis builds a CodeQL database from the Bazel build and runs the configured queries against it. -### Running CodeQL +The script supports two reusable phases that can be run independently: + +1. **Database creation** — compiles the codebase with CodeQL tracing and produces a reusable database. +2. **Analysis** — runs CodeQL queries against an existing database. + +### Running CodeQL (all-in-one) ```bash bazel run //quality/static_analysis:codeql_lint -- --target=//... @@ -65,7 +70,36 @@ To analyze a specific target: bazel run //quality/static_analysis:codeql_lint -- --target=//score/message_passing/... ``` -The only user-facing option is `--target`, which specifies the Bazel target pattern to analyze. The `--codeql_path` and `--config_path` arguments are pre-configured by the build target. +### Running CodeQL in phases + +Create the database once: + +```bash +bazel run //quality/static_analysis:codeql_lint -- \ + --phase create-database \ + --database-path /var/tmp/codeql_databases/codeql_db \ + --target //score/... +``` + +Run quick analysis (uses incremental queries from config.yaml): + +```bash +bazel run //quality/static_analysis:codeql_lint -- \ + --phase analyze-database \ + --database-path /var/tmp/codeql_databases/codeql_db +``` + +Run full analysis with a specific query pack (e.g. for nightly): + +```bash +bazel run //quality/static_analysis:codeql_lint -- \ + --phase analyze-database \ + --database-path /var/tmp/codeql_databases/codeql_db \ + --query-spec "codeql/misra-cpp-coding-standards@2.52.0" \ + --output-prefix codeql-nightly +``` + +The `--phase` argument accepts `create-database`, `analyze-database`, or `all` (default, original behavior). The `--query-spec` argument allows specifying a different query pack or suite for the analysis step. The `--output-prefix` argument controls the output file names. Results are written to the Bazel output directory (`bazel info output_path`): diff --git a/quality/static_analysis/codeql_lint.py b/quality/static_analysis/codeql_lint.py index 2903245d6..bc5dd32b5 100644 --- a/quality/static_analysis/codeql_lint.py +++ b/quality/static_analysis/codeql_lint.py @@ -21,6 +21,46 @@ TMP_PATH_FOR_DATABASES = "/var/tmp/codeql_databases" +def create_database(code_ql_path, config_path, target, source_root, database_path): + """Create the CodeQL database: init, build with tracing, finalize.""" + os.system( + f"{code_ql_path} database init --begin-tracing --language=cpp --codescanning-config={config_path} --source-root={source_root} -- {database_path}") + + with open(os.path.join(database_path, + "temp/tracingEnvironment/start-tracing.json")) as environment_description: + necessary_codeql_environment = json.load(environment_description) + env = _get_merged_environment(necessary_codeql_environment) + + process_coding_standards_config = f"bazel run @codeql_coding_standards//:process_coding_standards_config" + subprocess.run(process_coding_standards_config + f" -- --working-dir={source_root}", shell=True, env=env, + cwd=source_root, check=True) + + bazel_command = f"bazel build --config=codeql --stamp --action_env=CODEQL_SEED_FORCE_RECOMPILE={datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}" + bazel_command += _get_action_env_extension(necessary_codeql_environment) + subprocess.run(f"{bazel_command} {target}", shell=True, env=env, cwd=source_root, check=True) + + os.system(f"{code_ql_path} database finalize -j=0 -- {database_path}") + + +def analyze_database(code_ql_path, database_path, source_root, query_spec=None, output_prefix="codeql", output_dir=None): + """Run CodeQL analysis on an existing database.""" + if output_dir: + output_base = output_dir + os.makedirs(output_base, exist_ok=True) + else: + output_base = _get_bazel_info(source_root).get('output_path') + + query_arg = f" {query_spec}" if query_spec else "" + + os.system( + f"{code_ql_path} database analyze -j=0 {database_path}{query_arg} --format=sarifv2.1.0 --output={output_base}/{output_prefix}.sarif") + os.system( + f"{code_ql_path} database analyze -j=0 {database_path}{query_arg} --format=csv --output={output_base}/{output_prefix}.csv") + + # @todo it is possible to generate here also a full MISRA compliance report, which we could do in the future. + # path/to/ .sarif + + def main(): parser = argparse.ArgumentParser( description="Run CodeQL linting operations" @@ -34,42 +74,60 @@ def main(): ) parser.add_argument( "--target", + nargs="+", + help="Bazel target pattern(s) to build during tracing. Multiple targets can be supplied." + ) + parser.add_argument( + "--phase", + choices=["create-database", "analyze-database", "all"], + default="all", + help="Phase to run: create-database, analyze-database, or all (default)" + ) + parser.add_argument( + "--database-path", + help="Path to store/load the CodeQL database. " + "Required for create-database and analyze-database phases." + ) + parser.add_argument( + "--query-spec", + help="Query pack/suite spec for codeql database analyze " + "(e.g. codeql/misra-cpp-coding-standards@2.52.0). " + "If omitted, uses defaults from codescanning config." + ) + parser.add_argument( + "--output-prefix", + default="codeql", + help="Prefix for output file names (default: codeql)" + ) + parser.add_argument( + "--output-dir", + help="Directory for output files. If omitted, uses bazel info output_path." ) args = parser.parse_args() code_ql_path = args.codeql_path config_path = args.config_path - target = args.target + target = " ".join(args.target) if args.target else "" source_root = os.environ["BUILD_WORKING_DIRECTORY"] - os.makedirs(TMP_PATH_FOR_DATABASES, exist_ok=True) - with tempfile.TemporaryDirectory(dir=TMP_PATH_FOR_DATABASES) as database_location: - os.system( - f"{code_ql_path} database init --begin-tracing --language=cpp --codescanning-config={config_path} --source-root={source_root} -- {database_location}") - - with open(os.path.join(database_location, - "temp/tracingEnvironment/start-tracing.json")) as environment_description: - necessary_codeql_environment = json.load(environment_description) - env = _get_merged_environment(necessary_codeql_environment) - - process_coding_standards_config = f"bazel run @codeql_coding_standards//:process_coding_standards_config" - subprocess.run(process_coding_standards_config + f" -- --working-dir={source_root}", shell=True, env=env, - cwd=source_root, check=True) - - bazel_command = f"bazel build --config=codeql --stamp --action_env=CODEQL_SEED_FORCE_RECOMPILE={datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}" - bazel_command += _get_action_env_extension(necessary_codeql_environment) - subprocess.run(f"{bazel_command} {target}", shell=True, env=env, cwd=source_root, check=True) - - os.system(f"{code_ql_path} database finalize -j=0 -- {database_location}") - - output_base = _get_bazel_info(source_root).get('output_path') - os.system( - f"{code_ql_path} database analyze -j=0 {database_location} --format=sarifv2.1.0 --output={output_base}/codeql.sarif") - os.system( - f"{code_ql_path} database analyze -j=0 {database_location} --format=csv --output={output_base}/codeql.csv") - - # @todo it is possible to generate here also a full MISRA compliance report, which we could do in the future. - # path/to/ .sarif + if args.phase == "create-database": + database_path = args.database_path + os.makedirs(os.path.dirname(database_path), exist_ok=True) + create_database(code_ql_path, config_path, target, source_root, database_path) + + elif args.phase == "analyze-database": + database_path = args.database_path + analyze_database(code_ql_path, database_path, source_root, + query_spec=args.query_spec, output_prefix=args.output_prefix, + output_dir=args.output_dir) + + elif args.phase == "all": + os.makedirs(TMP_PATH_FOR_DATABASES, exist_ok=True) + with tempfile.TemporaryDirectory(dir=TMP_PATH_FOR_DATABASES) as database_location: + create_database(code_ql_path, config_path, target, source_root, database_location) + analyze_database(code_ql_path, database_location, source_root, + query_spec=args.query_spec, output_prefix=args.output_prefix, + output_dir=args.output_dir) def _get_action_env_extension(necessary_codeql_environment):