From 014c4ab273d0263aeb2de0032108af13a45f7ddb Mon Sep 17 00:00:00 2001
From: Tofik Hasanov <annexcies@gmail.com>
Date: Fri, 22 May 2026 15:20:54 -0400
Subject: [PATCH] fix(cloud-tests): render nested JSON in manual remediation
 steps correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Customer Simon reported that an IAM bucket policy embedded in an
auto-generated manual step was rendering with parts inside the code
block and parts as plain prose. Verified the cause in
RemediationDialog's `TextWithInlineCode`: the previous splitter used
a regex that only handles ONE level of brace nesting

    \{[^{}]*"(?:Version|Effect|Statement)"[^{}]*(?:\{[^{}]*\}[^{}]*)*\}

so a CloudTrail bucket policy — which has two-deep nesting via
`Statement[].Principal.{...}` AND `Statement[].Condition.StringEquals.
{...}` — matched only the first Statement object. The outer
`{"Version":...,"Statement":[` wrapper and the second Statement
escaped into prose around the code block.

Replaces the regex with a brace-balanced scan in a new helper
`extract-json-segments.ts`:

  - Walks the string counting braces, respecting string literals and
    escaped quotes so `"description":"} { inside"` doesn't confuse
    the depth counter.
  - Validates each candidate via `JSON.parse` before classifying it
    as a code block — invalid candidates fall through to text so we
    don't render garbage.
  - Pure function, no React/DOM, easy to test in isolation.

18 unit tests cover: arbitrary nesting, arrays of objects, multiple
JSONs in one step, escaped quotes, unbalanced braces, the customer-
reported CloudTrail-policy round-trip, and the no-JSON pass-through.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../components/RemediationDialog.tsx          |  30 +--
 .../components/extract-json-segments.test.ts  | 183 ++++++++++++++++++
 .../components/extract-json-segments.ts       | 128 ++++++++++++
 3 files changed, 328 insertions(+), 13 deletions(-)
 create mode 100644 apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.test.ts
 create mode 100644 apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.ts
diff --git a/apps/app/src/app/(app)/[orgId]/cloud-tests/components/RemediationDialog.tsx b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/RemediationDialog.tsx
index 62e28ab1f..f84754578 100644
--- a/apps/app/src/app/(app)/[orgId]/cloud-tests/components/RemediationDialog.tsx
+++ b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/RemediationDialog.tsx
@@ -15,6 +15,7 @@ import { useCallback, useEffect, useRef, useState } from 'react';
 import { toast } from 'sonner';
 import { startPreview, startSingleFix } from '../actions/single-fix';
 import { AcknowledgmentPanel } from './AcknowledgmentPanel';
+import { extractJsonSegments } from './extract-json-segments';
 import { PermissionErrorPanel } from './PermissionErrorPanel';
 
 interface PreviewProgress {
@@ -177,19 +178,22 @@ function TextSegment({ text }: { text: string }) {
 }
 
 function TextWithInlineCode({ text }: { text: string }) {
-  const jsonSplit = text.split(/(\{[^{}]*"(?:Version|Effect|Statement)"[^{}]*(?:\{[^{}]*\}[^{}]*)*\})/g);
-  const elements: React.ReactNode[] = [];
-  for (let i = 0; i < jsonSplit.length; i++) {
-    const segment = jsonSplit[i] ?? '';
-    if (segment.startsWith('{') && (segment.includes('"Version"') || segment.includes('"Effect"'))) {
-      try {
-        elements.push(<CodeBlock key={`json${i}`} code={JSON.stringify(JSON.parse(segment), null, 2)} />);
-      } catch { elements.push(<CodeBlock key={`json${i}`} code={segment} />); }
-    } else if (segment.trim()) {
-      elements.push(<TextSegment key={`seg${i}`} text={segment} />);
-    }
-  }
-  return <>{elements}</>;
+  // Brace-balanced scan via `extractJsonSegments`. Replaces the
+  // previous regex which only handled one level of nesting and
+  // mis-split policies with both Principal:{...} and
+  // Condition.StringEquals:{...}. See extract-json-segments.test.ts.
+  const segments = extractJsonSegments(text);
+  return (
+    <>
+      {segments.map((segment, i) => {
+        if (segment.type === 'json') {
+          return <CodeBlock key={`json${i}`} code={segment.pretty} />;
+        }
+        if (!segment.value.trim()) return null;
+        return <TextSegment key={`seg${i}`} text={segment.value} />;
+      })}
+    </>
+  );
 }
 
 function StepContent({ text }: { text: string }) {
diff --git a/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.test.ts b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.test.ts
new file mode 100644
index 000000000..c107ebd88
--- /dev/null
+++ b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.test.ts
@@ -0,0 +1,183 @@
+import { describe, expect, it } from 'vitest';
+import {
+  extractJsonSegments,
+  findBalancedEnd,
+} from './extract-json-segments';
+
+describe('findBalancedEnd', () => {
+  it('returns null when the position is not an opener', () => {
+    expect(findBalancedEnd('hello { world }', 0)).toBeNull();
+  });
+
+  it('finds the closing brace of a flat object', () => {
+    const t = 'pre {"a":1} post';
+    const start = t.indexOf('{');
+    const end = findBalancedEnd(t, start);
+    expect(end).not.toBeNull();
+    expect(t.slice(start, (end as number) + 1)).toBe('{"a":1}');
+  });
+
+  it('handles nested objects to arbitrary depth', () => {
+    const t = '{"a":{"b":{"c":{"d":1}}}}';
+    expect(findBalancedEnd(t, 0)).toBe(t.length - 1);
+  });
+
+  it('handles arrays containing objects', () => {
+    const t = '[{"a":1},{"b":2}]';
+    expect(findBalancedEnd(t, 0)).toBe(t.length - 1);
+  });
+
+  it('ignores braces that live inside string literals', () => {
+    const t = '{"key":"value with } and { inside"}';
+    expect(findBalancedEnd(t, 0)).toBe(t.length - 1);
+  });
+
+  it('handles escaped quotes inside string literals', () => {
+    const t = '{"key":"with \\"escaped\\" quotes"}';
+    expect(findBalancedEnd(t, 0)).toBe(t.length - 1);
+  });
+
+  it('returns null when braces never balance', () => {
+    expect(findBalancedEnd('{"unclosed":1', 0)).toBeNull();
+  });
+});
+
+describe('extractJsonSegments', () => {
+  it('returns the original string as a single text segment when there is no JSON', () => {
+    const result = extractJsonSegments('Open the AWS Console and create a trail.');
+    expect(result).toEqual([
+      { type: 'text', value: 'Open the AWS Console and create a trail.' },
+    ]);
+  });
+
+  it('splits prose + flat JSON object into ordered segments', () => {
+    const result = extractJsonSegments(
+      'Apply this policy: {"Version":"2012-10-17"} and verify.',
+    );
+    expect(result).toEqual([
+      { type: 'text', value: 'Apply this policy: ' },
+      {
+        type: 'json',
+        raw: '{"Version":"2012-10-17"}',
+        pretty: JSON.stringify({ Version: '2012-10-17' }, null, 2),
+      },
+      { type: 'text', value: ' and verify.' },
+    ]);
+  });
+
+  it('handles the customer-reported CloudTrail bucket policy (2 statements, Principal + Condition nested)', () => {
+    // Exact shape from Simon's screenshot. The previous regex-based
+    // splitter would have extracted only the first Statement object,
+    // leaving the outer wrapper and second Statement as plain text.
+    const policy = {
+      Version: '2012-10-17',
+      Statement: [
+        {
+          Sid: 'AWSCloudTrailAclCheck',
+          Effect: 'Allow',
+          Principal: { Service: 'cloudtrail.amazonaws.com' },
+          Action: 's3:GetBucketAcl',
+          Resource: 'arn:aws:s3:::BUCKETNAME',
+        },
+        {
+          Sid: 'AWSCloudTrailWrite',
+          Effect: 'Allow',
+          Principal: { Service: 'cloudtrail.amazonaws.com' },
+          Action: 's3:PutObject',
+          Resource: 'arn:aws:s3:::BUCKETNAME/AWSLogs/ACCOUNTID/*',
+          Condition: {
+            StringEquals: { 's3:x-amz-acl': 'bucket-owner-full-control' },
+          },
+        },
+      ],
+    };
+    const text = `In the S3 bucket you just created, go to the Permissions tab and add this bucket policy: ${JSON.stringify(policy)}`;
+
+    const result = extractJsonSegments(text);
+
+    expect(result).toHaveLength(2);
+    expect(result[0]).toEqual({
+      type: 'text',
+      value:
+        'In the S3 bucket you just created, go to the Permissions tab and add this bucket policy: ',
+    });
+    expect(result[1]?.type).toBe('json');
+    if (result[1]?.type === 'json') {
+      // Round-trip — the helper must extract the FULL policy, not a partial slice.
+      expect(JSON.parse(result[1].raw)).toEqual(policy);
+    }
+  });
+
+  it('extracts multiple JSON blocks in the same string', () => {
+    const text = 'first: {"a":1} then: {"b":2} done';
+    const result = extractJsonSegments(text);
+    expect(result.map((s) => s.type)).toEqual([
+      'text',
+      'json',
+      'text',
+      'json',
+      'text',
+    ]);
+  });
+
+  it('extracts JSON arrays as well as objects', () => {
+    const result = extractJsonSegments('See: [{"x":1},{"y":2}]');
+    expect(result).toHaveLength(2);
+    expect(result[1]?.type).toBe('json');
+  });
+
+  it('falls through to text when balanced braces are not valid JSON', () => {
+    // `{ not json }` has balanced braces but isn't parseable.
+    const result = extractJsonSegments('here: { not json } ok');
+    expect(result.every((s) => s.type === 'text')).toBe(true);
+    const joined = result
+      .map((s) => (s.type === 'text' ? s.value : ''))
+      .join('');
+    expect(joined).toBe('here: { not json } ok');
+  });
+
+  it('falls through to text when braces are unbalanced', () => {
+    const result = extractJsonSegments('broken: {"a":1 still text');
+    expect(result.every((s) => s.type === 'text')).toBe(true);
+  });
+
+  it('does NOT misclassify braces inside JSON string values', () => {
+    // The `}` inside the description must not terminate the JSON early.
+    const text =
+      'Apply: {"description":"contains } and { in text","key":"v"}';
+    const result = extractJsonSegments(text);
+    const jsonSegments = result.filter((s) => s.type === 'json');
+    expect(jsonSegments).toHaveLength(1);
+    if (jsonSegments[0]?.type === 'json') {
+      const parsed = JSON.parse(jsonSegments[0].raw);
+      expect(parsed.description).toBe('contains } and { in text');
+    }
+  });
+
+  it('handles a JSON block at the very start of the string', () => {
+    const result = extractJsonSegments('{"a":1} trailing');
+    expect(result).toEqual([
+      {
+        type: 'json',
+        raw: '{"a":1}',
+        pretty: JSON.stringify({ a: 1 }, null, 2),
+      },
+      { type: 'text', value: ' trailing' },
+    ]);
+  });
+
+  it('handles a JSON block at the very end of the string', () => {
+    const result = extractJsonSegments('leading {"a":1}');
+    expect(result[0]).toEqual({ type: 'text', value: 'leading ' });
+    expect(result[1]?.type).toBe('json');
+  });
+
+  it('pretty-prints with 2-space indentation', () => {
+    const result = extractJsonSegments('{"a":1,"b":[1,2]}');
+    expect(result[0]?.type).toBe('json');
+    if (result[0]?.type === 'json') {
+      expect(result[0].pretty).toBe(JSON.stringify({ a: 1, b: [1, 2] }, null, 2));
+      expect(result[0].pretty).toContain('\n');
+    }
+  });
+});
diff --git a/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.ts b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.ts
new file mode 100644
index 000000000..1efb4f138
--- /dev/null
+++ b/apps/app/src/app/(app)/[orgId]/cloud-tests/components/extract-json-segments.ts
@@ -0,0 +1,128 @@
+/**
+ * Split a freeform string into ordered TEXT and JSON segments.
+ *
+ * Background: the AI-generated manual remediation steps frequently
+ * embed IAM/bucket-policy JSON inside otherwise plain English. We
+ * render JSON as a code block and the surrounding prose as text. The
+ * previous implementation used a regex (`\{[^{}]*"(Version|Effect|
+ * Statement)"[^{}]*(?:\{[^{}]*\}[^{}]*)*\}`) which only handles ONE
+ * level of nesting — a bucket policy with both `Principal:{...}` and
+ * `Condition:{StringEquals:{...}}` would split incorrectly, leaving
+ * the outer wrapper and any deeper-nested statements as plain text.
+ *
+ * This helper does a proper brace-balanced scan instead:
+ *  - Locate `{` or `[` (a JSON candidate start).
+ *  - Walk forward counting braces, respecting string literals (so
+ *    `"value with } inside"` doesn't fool us).
+ *  - On balanced close, try `JSON.parse`. If valid, emit a json
+ *    segment; if not, keep scanning as text.
+ *
+ * Pure function. No DOM, no React — easy to unit-test.
+ */
+
+export type Segment =
+  | { type: 'text'; value: string }
+  | { type: 'json'; raw: string; pretty: string };
+
+const OPEN: Record<string, string> = { '{': '}', '[': ']' };
+
+/**
+ * Try to find the index of the matching closing bracket for the
+ * opener at `start`. Returns the index of the closer, or null if the
+ * string is unbalanced before the end.
+ *
+ * Handles:
+ *  - nested objects + arrays at arbitrary depth
+ *  - string literals with `\"` escapes (so braces inside strings
+ *    don't affect depth counting)
+ */
+export function findBalancedEnd(
+  text: string,
+  start: number,
+): number | null {
+  const openCh = text[start];
+  if (openCh !== '{' && openCh !== '[') return null;
+
+  let depth = 0;
+  let inString = false;
+  let escaped = false;
+
+  for (let i = start; i < text.length; i++) {
+    const c = text[i];
+    if (escaped) {
+      escaped = false;
+      continue;
+    }
+    if (c === '\\') {
+      // Only meaningful inside strings, but cheap to track unconditionally.
+      escaped = true;
+      continue;
+    }
+    if (c === '"') {
+      inString = !inString;
+      continue;
+    }
+    if (inString) continue;
+    if (c === '{' || c === '[') depth++;
+    else if (c === '}' || c === ']') {
+      depth--;
+      if (depth === 0) return i;
+      if (depth < 0) return null;
+    }
+  }
+  return null;
+}
+
+/**
+ * Walk a freeform string and return ordered segments. Text between
+ * JSON blocks is emitted verbatim; JSON blocks are validated via
+ * `JSON.parse` before being classified as such — invalid candidates
+ * fall through to text so we don't render garbage as a "code block".
+ *
+ * Top-level non-object/array values (numbers, strings, bare nulls)
+ * are intentionally left as text; they don't benefit from code-block
+ * formatting.
+ */
+export function extractJsonSegments(text: string): Segment[] {
+  const segments: Segment[] = [];
+  let buffer = '';
+  let i = 0;
+
+  while (i < text.length) {
+    const ch = text[i];
+    if (ch === '{' || ch === '[') {
+      const end = findBalancedEnd(text, i);
+      if (end !== null) {
+        const raw = text.slice(i, end + 1);
+        try {
+          const parsed: unknown = JSON.parse(raw);
+          if (
+            parsed !== null &&
+            (typeof parsed === 'object' || Array.isArray(parsed))
+          ) {
+            if (buffer.length > 0) {
+              segments.push({ type: 'text', value: buffer });
+              buffer = '';
+            }
+            segments.push({
+              type: 'json',
+              raw,
+              pretty: JSON.stringify(parsed, null, 2),
+            });
+            i = end + 1;
+            continue;
+          }
+        } catch {
+          // Not valid JSON despite balanced braces — treat as text.
+        }
+      }
+    }
+    buffer += ch;
+    i++;
+  }
+
+  if (buffer.length > 0) {
+    segments.push({ type: 'text', value: buffer });
+  }
+  return segments;
+}