From fe23e9a1e67730a0538f300eaea178dddd8c4004 Mon Sep 17 00:00:00 2001 From: Caleb Kaiser <42076840+caleb-kaiser@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:04:20 -0400 Subject: [PATCH] Update tree sitter for parsing wrapped functions + associated tests --- __tests__/extraction.test.ts | 132 ++++++++++++++++++++++++++ src/extraction/tree-sitter-helpers.ts | 101 +++++++++++++++++--- 2 files changed, 220 insertions(+), 13 deletions(-) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index aae1d9e62..0e37f3a7d 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -6681,3 +6681,135 @@ describe('Swift property wrappers / attributes (blast-radius recall)', () => { } finally { cleanupTempDir(dir); } }); }); + + + +describe('Docstring attachment for wrapped/decorated symbols', () => { + it('attaches a preceding comment to an exported function', () => { + const code = ` +// Charges the card +export function processPayment(amount) { return amount; } +`; + const fn = extractFromSource('pay.ts', code).nodes.find((n) => n.name === 'processPayment'); + expect(fn?.docstring).toBe('Charges the card'); + }); + + it('attaches a preceding comment to an exported class', () => { + const code = ` +/** The payment service */ +export class PaymentService {} +`; + const cls = extractFromSource('svc.ts', code).nodes.find((n) => n.name === 'PaymentService'); + expect(cls?.docstring).toBe('The payment service'); + }); + + it('attaches a preceding comment to an arrow export const', () => { + const code = ` +// adds two numbers +export const add = (a, b) => a + b; +`; + const fn = extractFromSource('add.ts', code).nodes.find((n) => n.name === 'add'); + expect(fn?.docstring).toBe('adds two numbers'); + }); + + it('attaches a preceding comment to an export default function', () => { + const code = ` +// default handler +export default function handler() {} +`; + const fn = extractFromSource('handler.ts', code).nodes.find((n) => n.name === 'handler'); + expect(fn?.docstring).toBe('default handler'); + }); + + it('attaches a preceding comment to a decorated Python function', () => { + const code = ` +# does the thing +@app.route("/x") +def view(): + return 1 +`; + const fn = extractFromSource('views.py', code).nodes.find((n) => n.name === 'view'); + expect(fn?.docstring).toContain('does the thing'); + }); + + it('attaches a preceding comment to a decorated Python class', () => { + const code = ` +# a model +@dataclass +class Model: + pass +`; + const cls = extractFromSource('model.py', code).nodes.find((n) => n.name === 'Model'); + expect(cls?.docstring).toContain('a model'); + }); + + it('captures a comment on an annotated Java method via the base walk (not affected by the bug)', () => { + // In tree-sitter-java the `@Override` annotation is inside the method + // declaration, so the method's own previousNamedSibling is already the + // comment — the base walk captures it without any wrapper climb. Pinned as + // a control so the wrapper-climb scope-narrowing doesn't regress it. + const code = ` +public class C { + // handles requests + @Override + public void handle() {} +} +`; + const m = extractFromSource('C.java', code).nodes.find((n) => n.name === 'handle'); + expect(m?.docstring).toBe('handles requests'); + }); + + it('captures a comment on a C# attributed method via the base walk (not affected by the bug)', () => { + // Same as Java: the `[HttpGet]` attribute is inside the method declaration, + // so the base sibling-walk already finds the comment — no climb needed. + const code = ` +public class C { + // the endpoint + [HttpGet] + public void Get() {} +} +`; + const m = extractFromSource('C.cs', code).nodes.find((n) => n.name === 'Get'); + expect(m?.docstring).toBe('the endpoint'); + }); + + it('does not change a non-wrapped symbol docstring (no regression)', () => { + const code = ` +// plain helper +function helper() {} +`; + const fn = extractFromSource('helper.ts', code).nodes.find((n) => n.name === 'helper'); + expect(fn?.docstring).toBe('plain helper'); + }); + + it('leaves a wrapped symbol with no preceding comment as undefined', () => { + const fn = extractFromSource('none.ts', `export const x = () => 1;`) + .nodes.find((n) => n.name === 'x'); + expect(fn?.docstring).toBeUndefined(); + }); + + it('does not mis-attach a comment across two sibling exports', () => { + // The comment precedes `second`, not `first` — it must attach only to + // `second`, and `first` must stay undefined. + const code = ` +export function first() {} +// doc for second +export function second() {} +`; + const nodes = extractFromSource('multi.ts', code).nodes; + expect(nodes.find((n) => n.name === 'first')?.docstring).toBeUndefined(); + expect(nodes.find((n) => n.name === 'second')?.docstring).toBe('doc for second'); + }); + + it('does not attach a comment to the second declarator of one declaration', () => { + // A single comment above `const a = ..., b = ...` belongs to the leading + // declarator only — `b` must not inherit it. + const code = ` +// shared +export const a = () => 1, b = () => 2; +`; + const nodes = extractFromSource('decl.ts', code).nodes; + expect(nodes.find((n) => n.name === 'a')?.docstring).toBe('shared'); + expect(nodes.find((n) => n.name === 'b')?.docstring).toBeUndefined(); + }); +}); diff --git a/src/extraction/tree-sitter-helpers.ts b/src/extraction/tree-sitter-helpers.ts index 0f2dd1bfd..a62780b1f 100644 --- a/src/extraction/tree-sitter-helpers.ts +++ b/src/extraction/tree-sitter-helpers.ts @@ -44,27 +44,102 @@ export function getChildByField(node: SyntaxNode, fieldName: string): SyntaxNode } /** - * Get the docstring/comment preceding a node + * Comment node utilities */ -export function getPrecedingDocstring(node: SyntaxNode, source: string): string | undefined { +function isCommentNode(node: SyntaxNode): boolean { + return ( + node.type === 'comment' || + node.type === 'line_comment' || + node.type === 'block_comment' || + node.type === 'documentation_comment' + ); +} + +const DOCSTRING_WRAPPER_TYPES = new Set([ + // TS/JS + 'export_statement', + 'lexical_declaration', + 'variable_declaration', + 'variable_declarator', + // Python decorators + 'decorated_definition', +]); + + +/** + * Collect the contiguous run of comment siblings immediately preceding `node` + */ +function collectPrecedingComments(node: SyntaxNode, source: string): string[] | null { let sibling = node.previousNamedSibling; const comments: string[] = []; - while (sibling) { - if ( - sibling.type === 'comment' || - sibling.type === 'line_comment' || - sibling.type === 'block_comment' || - sibling.type === 'documentation_comment' - ) { - comments.unshift(getNodeText(sibling, source)); - sibling = sibling.previousNamedSibling; - } else { + while (sibling && isCommentNode(sibling)) { + comments.unshift(getNodeText(sibling, source)); + sibling = sibling.previousNamedSibling; + } + + return comments.length > 0 ? comments : null; +} + +/** + * Climb from `node` toward the root through transparent wrapper/decorator parents, returning the outermost wrapper whose preceding comment should be attributed to `node`. + */ +function climbToWrapperWithComment(node: SyntaxNode): SyntaxNode | null { + let current = node; + + while (current.parent && DOCSTRING_WRAPPER_TYPES.has(current.parent.type)) { + const parent = current.parent; + + let leading = true; + for (let i = 0; i < parent.namedChildCount; i++) { + const child = parent.namedChild(i); + if (!child) continue; + if (child.id === current.id) break; + if ( + isCommentNode(child) || + child.type === 'decorator' || + child.type === 'identifier' || + child.type === 'property_identifier' || + child.type === 'type_annotation' || + child.type === 'type_identifier' + ) { + continue; + } + leading = false; break; } + if (!leading) break; + + // If the wrapper itself has a preceding comment sibling, the climb has reached the node that owns the comment. + const prev = parent.previousNamedSibling; + if (prev && isCommentNode(prev)) { + return parent; + } + + current = parent; + } + + // No climbed wrapper had a preceding comment. + return null; +} + + + +/** + * Get the docstring/comment preceding a node + */ +export function getPrecedingDocstring(node: SyntaxNode, source: string): string | undefined { + let comments = collectPrecedingComments(node, source); + + if (comments === null) { + // No direct preceding comment — climb through wrapper/decorator parents. + const wrapper = climbToWrapperWithComment(node); + if (wrapper) { + comments = collectPrecedingComments(wrapper, source); + } } - if (comments.length === 0) return undefined; + if (comments === null || comments.length === 0) return undefined; // Clean up comment markers return comments