From 2f7f8ae126b8ff5dfd4f5424f88b83e0af6c3abb Mon Sep 17 00:00:00 2001
From: komal mahale <komal.mahale@bti.bmwgroup.com>
Date: Fri, 29 May 2026 12:06:50 +0530
Subject: [PATCH 1/2] feat: add CodeQL GitHub Actions workflow

Run CodeQL via Bazel on all relevant C++ targets (//score/...)
on every pull_request and push to main, then upload the resulting
SARIF file to GitHub Code Scanning via github/codeql-action/upload-sarif.

Without this upload step GitHub has no stored baseline, causing every
PR to incorrectly report no new alerts introduced.

Refs: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github
---
 .github/workflows/codeql.yml | 89 ++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 .github/workflows/codeql.yml

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 000000000..60a07c423
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,89 @@
+# *******************************************************************************
+# Copyright (c) 2026 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+
+# Workflow: CodeQL analysis on push to main and on demand.
+#
+# Runs CodeQL via Bazel on all relevant C++ targets under //score/...
+# and uploads the resulting SARIF file to GitHub Code Scanning so that:
+#   - GitHub tracks findings over time with a stable baseline on main.
+#   - New PRs are compared against that baseline to surface new findings.
+#
+# Why not run on pull_request?
+#   The full CodeQL analysis (169 MISRA rules over //score/...) exceeds
+#   GitHub's 6-hour runner limit and is cancelled before producing output.
+#   Running on push to main ensures a complete, uploadable baseline exists.
+#   GitHub Code Scanning then uses that baseline to flag new findings per PR.
+#
+# Reference: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github
+
+name: CodeQL Analysis
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:  # Allow maintainers to trigger manually when needed
+
+permissions:
+  contents: read
+  security-events: write  # Required to upload SARIF results to GitHub Code Scanning
+
+concurrency:
+  group: codeql-${{ github.run_id }}
+  cancel-in-progress: false  # Never cancel an in-progress CodeQL run; it takes hours
+
+env:
+  ANDROID_HOME: ""
+  ANDROID_SDK_ROOT: ""
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
+jobs:
+  codeql:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Free Disk Space (Ubuntu)
+        uses: eclipse-score/more-disk-space@v1
+        with:
+          level: 4
+
+      - name: Setup Bazel
+        uses: castler/setup-bazel@cache-optimized
+        with:
+          bazelisk-cache: true
+          disk-cache: "codeql"
+          repository-cache: true
+          cache-optimized: true
+          cache-save: ${{ github.ref == 'refs/heads/main' }}
+
+      - name: Allow linux-sandbox
+        uses: ./actions/unblock_user_namespace_for_linux_sandbox
+
+      - name: Run CodeQL via Bazel
+        run: |
+          bazel run //quality/static_analysis:codeql_lint -- \
+            --target //score/...
+
+      - name: Locate SARIF output
+        id: sarif
+        run: |
+          SARIF_PATH="$(bazel info output_path)/codeql.sarif"
+          echo "path=${SARIF_PATH}" >> "$GITHUB_OUTPUT"
+
+      - name: Upload SARIF to GitHub Code Scanning
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: ${{ steps.sarif.outputs.path }}
+          category: codeql-bazel

From ac3931bf0d72c2ee09262c1bcdae4760590c1ee0 Mon Sep 17 00:00:00 2001
From: komal mahale <komal.mahale@bti.bmwgroup.com>
Date: Wed, 10 Jun 2026 18:55:29 +0530
Subject: [PATCH 2/2] feat(codeql): split database generation and analysis

- separate create and analyze phases in codeql lint flow

- include review fixes and output handling improvements
---
 .github/workflows/codeql.yml           |  97 ++++++++++++++-------
 quality/quality.md                     |  38 +++++++-
 quality/static_analysis/codeql_lint.py | 116 ++++++++++++++++++-------
 3 files changed, 191 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 60a07c423..fa85b5630 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -11,26 +11,26 @@
 # SPDX-License-Identifier: Apache-2.0
 # *******************************************************************************
 
-# Workflow: CodeQL analysis on push to main and on demand.
+# Workflow: CodeQL analysis — split into database creation and analysis phases.
 #
-# Runs CodeQL via Bazel on all relevant C++ targets under //score/...
-# and uploads the resulting SARIF file to GitHub Code Scanning so that:
-#   - GitHub tracks findings over time with a stable baseline on main.
-#   - New PRs are compared against that baseline to surface new findings.
+# Phase 1 (create-codeql-database): Builds the codebase with CodeQL tracing
+#   and produces a reusable CodeQL database artifact.
 #
-# Why not run on pull_request?
-#   The full CodeQL analysis (169 MISRA rules over //score/...) exceeds
-#   GitHub's 6-hour runner limit and is cancelled before producing output.
-#   Running on push to main ensures a complete, uploadable baseline exists.
-#   GitHub Code Scanning then uses that baseline to flag new findings per PR.
+# Phase 2 (analysis): Downloads the database and runs CodeQL queries.
+#   - PR / push to main: runs the incremental (quick) query set defined in
+#     config.yaml, which excludes queries tagged "exclude-from-incremental".
+#   - Nightly (schedule): runs the full MISRA pack including slow queries.
+#
+# This split avoids rebuilding the database for each analysis profile and
+# enables running different query sets for PR feedback vs nightly compliance.
 #
 # Reference: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/uploading-a-sarif-file-to-github
 
 name: CodeQL Analysis
 
 on:
-  push:
-    branches: [main]
+  schedule:
+    - cron: '0 2 * * *'  # Nightly at 2 AM UTC
   workflow_dispatch:  # Allow maintainers to trigger manually when needed
 
 permissions:
@@ -38,7 +38,7 @@ permissions:
   security-events: write  # Required to upload SARIF results to GitHub Code Scanning
 
 concurrency:
-  group: codeql-${{ github.run_id }}
+  group: codeql-${{ github.ref }}
   cancel-in-progress: false  # Never cancel an in-progress CodeQL run; it takes hours
 
 env:
@@ -47,7 +47,8 @@ env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 
 jobs:
-  codeql:
+  # ── Phase 1: Create CodeQL database ──────────────────────────────────────
+  create-codeql-database:
     runs-on: ubuntu-24.04
 
     steps:
@@ -61,29 +62,67 @@ jobs:
 
       - name: Setup Bazel
         uses: castler/setup-bazel@cache-optimized
-        with:
-          bazelisk-cache: true
-          disk-cache: "codeql"
-          repository-cache: true
-          cache-optimized: true
-          cache-save: ${{ github.ref == 'refs/heads/main' }}
 
       - name: Allow linux-sandbox
         uses: ./actions/unblock_user_namespace_for_linux_sandbox
 
-      - name: Run CodeQL via Bazel
+      - name: Create CodeQL database
         run: |
           bazel run //quality/static_analysis:codeql_lint -- \
-            --target //score/...
+            --phase create-database \
+            --database-path /var/tmp/codeql_databases/codeql_db \
+            --target //score/message_passing/... //score/mw/com/...
+
+      - name: Upload CodeQL database artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: codeql-database
+          path: /var/tmp/codeql_databases/codeql_db
+          retention-days: 1
+
+  # ── Phase 2: Full analysis (nightly) ─────────────────────────────────────
+  analyze-nightly:
+    needs: create-codeql-database
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Free Disk Space (Ubuntu)
+        uses: eclipse-score/more-disk-space@v1
+        with:
+          level: 4
 
-      - name: Locate SARIF output
-        id: sarif
+      - name: Setup Bazel
+        uses: castler/setup-bazel@cache-optimized
+
+      - name: Allow linux-sandbox
+        uses: ./actions/unblock_user_namespace_for_linux_sandbox
+
+      - name: Download CodeQL database
+        uses: actions/download-artifact@v4
+        with:
+          name: codeql-database
+          path: /var/tmp/codeql_databases/codeql_db
+
+      - name: Run CodeQL analysis (full — all MISRA rules)
         run: |
-          SARIF_PATH="$(bazel info output_path)/codeql.sarif"
-          echo "path=${SARIF_PATH}" >> "$GITHUB_OUTPUT"
+          bazel run //quality/static_analysis:codeql_lint -- \
+            --phase analyze-database \
+            --database-path /var/tmp/codeql_databases/codeql_db \
+            --output-dir /tmp/codeql-results \
+            --output-prefix codeql-nightly
 
       - name: Upload SARIF to GitHub Code Scanning
-        uses: github/codeql-action/upload-sarif@v3
+        uses: github/codeql-action/upload-sarif@v4
+        with:
+          sarif_file: /tmp/codeql-results/codeql-nightly.sarif
+          category: codeql-nightly
+
+      - name: Upload CSV results
+        uses: actions/upload-artifact@v4
         with:
-          sarif_file: ${{ steps.sarif.outputs.path }}
-          category: codeql-bazel
+          name: codeql-csv-results
+          path: /tmp/codeql-results/codeql-nightly.csv
+          retention-days: 30
diff --git a/quality/quality.md b/quality/quality.md
index 27e4ad326..4a28c334e 100644
--- a/quality/quality.md
+++ b/quality/quality.md
@@ -53,7 +53,12 @@ git add -p      # interactively stage hunks
 
 CodeQL performs MISRA C++ compliance checking using the `codeql/misra-cpp-coding-standards` query pack (version pinned in [`quality/static_analysis/config.yaml`](static_analysis/config.yaml)). The analysis builds a CodeQL database from the Bazel build and runs the configured queries against it.
 
-### Running CodeQL
+The script supports two reusable phases that can be run independently:
+
+1. **Database creation** — compiles the codebase with CodeQL tracing and produces a reusable database.
+2. **Analysis** — runs CodeQL queries against an existing database.
+
+### Running CodeQL (all-in-one)
 
 ```bash
 bazel run //quality/static_analysis:codeql_lint -- --target=//...
@@ -65,7 +70,36 @@ To analyze a specific target:
 bazel run //quality/static_analysis:codeql_lint -- --target=//score/message_passing/...
 ```
 
-The only user-facing option is `--target`, which specifies the Bazel target pattern to analyze. The `--codeql_path` and `--config_path` arguments are pre-configured by the build target.
+### Running CodeQL in phases
+
+Create the database once:
+
+```bash
+bazel run //quality/static_analysis:codeql_lint -- \
+  --phase create-database \
+  --database-path /var/tmp/codeql_databases/codeql_db \
+  --target //score/...
+```
+
+Run quick analysis (uses incremental queries from config.yaml):
+
+```bash
+bazel run //quality/static_analysis:codeql_lint -- \
+  --phase analyze-database \
+  --database-path /var/tmp/codeql_databases/codeql_db
+```
+
+Run full analysis with a specific query pack (e.g. for nightly):
+
+```bash
+bazel run //quality/static_analysis:codeql_lint -- \
+  --phase analyze-database \
+  --database-path /var/tmp/codeql_databases/codeql_db \
+  --query-spec "codeql/misra-cpp-coding-standards@2.52.0" \
+  --output-prefix codeql-nightly
+```
+
+The `--phase` argument accepts `create-database`, `analyze-database`, or `all` (default, original behavior). The `--query-spec` argument allows specifying a different query pack or suite for the analysis step. The `--output-prefix` argument controls the output file names.
 
 Results are written to the Bazel output directory (`bazel info output_path`):
 
diff --git a/quality/static_analysis/codeql_lint.py b/quality/static_analysis/codeql_lint.py
index 2903245d6..bc5dd32b5 100644
--- a/quality/static_analysis/codeql_lint.py
+++ b/quality/static_analysis/codeql_lint.py
@@ -21,6 +21,46 @@
 TMP_PATH_FOR_DATABASES = "/var/tmp/codeql_databases"
 
 
+def create_database(code_ql_path, config_path, target, source_root, database_path):
+    """Create the CodeQL database: init, build with tracing, finalize."""
+    os.system(
+        f"{code_ql_path} database init --begin-tracing --language=cpp --codescanning-config={config_path} --source-root={source_root} -- {database_path}")
+
+    with open(os.path.join(database_path,
+                           "temp/tracingEnvironment/start-tracing.json")) as environment_description:
+        necessary_codeql_environment = json.load(environment_description)
+        env = _get_merged_environment(necessary_codeql_environment)
+
+        process_coding_standards_config = f"bazel run @codeql_coding_standards//:process_coding_standards_config"
+        subprocess.run(process_coding_standards_config + f" -- --working-dir={source_root}", shell=True, env=env,
+                       cwd=source_root, check=True)
+
+        bazel_command = f"bazel build --config=codeql --stamp --action_env=CODEQL_SEED_FORCE_RECOMPILE={datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+        bazel_command += _get_action_env_extension(necessary_codeql_environment)
+        subprocess.run(f"{bazel_command} {target}", shell=True, env=env, cwd=source_root, check=True)
+
+        os.system(f"{code_ql_path} database finalize -j=0 -- {database_path}")
+
+
+def analyze_database(code_ql_path, database_path, source_root, query_spec=None, output_prefix="codeql", output_dir=None):
+    """Run CodeQL analysis on an existing database."""
+    if output_dir:
+        output_base = output_dir
+        os.makedirs(output_base, exist_ok=True)
+    else:
+        output_base = _get_bazel_info(source_root).get('output_path')
+
+    query_arg = f" {query_spec}" if query_spec else ""
+
+    os.system(
+        f"{code_ql_path} database analyze -j=0 {database_path}{query_arg} --format=sarifv2.1.0 --output={output_base}/{output_prefix}.sarif")
+    os.system(
+        f"{code_ql_path} database analyze -j=0 {database_path}{query_arg} --format=csv --output={output_base}/{output_prefix}.csv")
+
+    # @todo it is possible to generate here also a full MISRA compliance report, which we could do in the future.
+    # path/to/<output_database_name> <name-of-results-file>.sarif <output_directory>
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Run CodeQL linting operations"
@@ -34,42 +74,60 @@ def main():
     )
     parser.add_argument(
         "--target",
+        nargs="+",
+        help="Bazel target pattern(s) to build during tracing. Multiple targets can be supplied."
+    )
+    parser.add_argument(
+        "--phase",
+        choices=["create-database", "analyze-database", "all"],
+        default="all",
+        help="Phase to run: create-database, analyze-database, or all (default)"
+    )
+    parser.add_argument(
+        "--database-path",
+        help="Path to store/load the CodeQL database. "
+             "Required for create-database and analyze-database phases."
+    )
+    parser.add_argument(
+        "--query-spec",
+        help="Query pack/suite spec for codeql database analyze "
+             "(e.g. codeql/misra-cpp-coding-standards@2.52.0). "
+             "If omitted, uses defaults from codescanning config."
+    )
+    parser.add_argument(
+        "--output-prefix",
+        default="codeql",
+        help="Prefix for output file names (default: codeql)"
+    )
+    parser.add_argument(
+        "--output-dir",
+        help="Directory for output files. If omitted, uses bazel info output_path."
     )
 
     args = parser.parse_args()
     code_ql_path = args.codeql_path
     config_path = args.config_path
-    target = args.target
+    target = " ".join(args.target) if args.target else ""
     source_root = os.environ["BUILD_WORKING_DIRECTORY"]
 
-    os.makedirs(TMP_PATH_FOR_DATABASES, exist_ok=True)
-    with tempfile.TemporaryDirectory(dir=TMP_PATH_FOR_DATABASES) as database_location:
-        os.system(
-            f"{code_ql_path} database init --begin-tracing --language=cpp --codescanning-config={config_path} --source-root={source_root} -- {database_location}")
-
-        with open(os.path.join(database_location,
-                               "temp/tracingEnvironment/start-tracing.json")) as environment_description:
-            necessary_codeql_environment = json.load(environment_description)
-            env = _get_merged_environment(necessary_codeql_environment)
-
-            process_coding_standards_config = f"bazel run @codeql_coding_standards//:process_coding_standards_config"
-            subprocess.run(process_coding_standards_config + f" -- --working-dir={source_root}", shell=True, env=env,
-                           cwd=source_root, check=True)
-
-            bazel_command = f"bazel build --config=codeql --stamp --action_env=CODEQL_SEED_FORCE_RECOMPILE={datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}"
-            bazel_command += _get_action_env_extension(necessary_codeql_environment)
-            subprocess.run(f"{bazel_command} {target}", shell=True, env=env, cwd=source_root, check=True)
-
-            os.system(f"{code_ql_path} database finalize -j=0 -- {database_location}")
-
-            output_base = _get_bazel_info(source_root).get('output_path')
-            os.system(
-                f"{code_ql_path} database analyze -j=0 {database_location} --format=sarifv2.1.0 --output={output_base}/codeql.sarif")
-            os.system(
-                f"{code_ql_path} database analyze -j=0 {database_location} --format=csv --output={output_base}/codeql.csv")
-
-            # @todo it is possible to generate here also a full MISRA compliance report, which we could do in the future.
-            # path/to/<output_database_name> <name-of-results-file>.sarif <output_directory>
+    if args.phase == "create-database":
+        database_path = args.database_path
+        os.makedirs(os.path.dirname(database_path), exist_ok=True)
+        create_database(code_ql_path, config_path, target, source_root, database_path)
+
+    elif args.phase == "analyze-database":
+        database_path = args.database_path
+        analyze_database(code_ql_path, database_path, source_root,
+                         query_spec=args.query_spec, output_prefix=args.output_prefix,
+                         output_dir=args.output_dir)
+
+    elif args.phase == "all":
+        os.makedirs(TMP_PATH_FOR_DATABASES, exist_ok=True)
+        with tempfile.TemporaryDirectory(dir=TMP_PATH_FOR_DATABASES) as database_location:
+            create_database(code_ql_path, config_path, target, source_root, database_location)
+            analyze_database(code_ql_path, database_location, source_root,
+                             query_spec=args.query_spec, output_prefix=args.output_prefix,
+                             output_dir=args.output_dir)
 
 
 def _get_action_env_extension(necessary_codeql_environment):