From 5e7314d5ca6409acd8545c2e631ea6985e6fe4cb Mon Sep 17 00:00:00 2001 From: emrberk Date: Wed, 22 Apr 2026 16:35:09 +0300 Subject: [PATCH] fix: add cap to path discovery, add window clause and trailing comma support --- package.json | 2 +- src/autocomplete/budgeted-content-assist.ts | 329 ++++++++++++++++++++ src/autocomplete/content-assist.ts | 12 +- src/autocomplete/suggestion-builder.ts | 33 +- src/parser/ast.ts | 24 ++ src/parser/cst-types.d.ts | 40 +++ src/parser/parser.ts | 102 +++++- src/parser/toSql.ts | 28 +- src/parser/visitor.ts | 58 ++++ tests/autocomplete-perf.test.ts | 97 ++++++ tests/autocomplete.test.ts | 322 +++++++++++++++++++ tests/fixtures/docs-queries.json | 24 ++ tests/parser.test.ts | 176 +++++++++++ tests/recovery.test.ts | 8 +- yarn.lock | 4 +- 15 files changed, 1231 insertions(+), 28 deletions(-) create mode 100644 src/autocomplete/budgeted-content-assist.ts create mode 100644 tests/autocomplete-perf.test.ts diff --git a/package.json b/package.json index ddb3729..226d4ba 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,7 @@ }, "license": "Apache-2.0", "dependencies": { - "chevrotain": "^11.1.1" + "chevrotain": "11.1.1" }, "devDependencies": { "@chevrotain/cst-dts-gen": "^11.1.1", diff --git a/src/autocomplete/budgeted-content-assist.ts b/src/autocomplete/budgeted-content-assist.ts new file mode 100644 index 0000000..225cd45 --- /dev/null +++ b/src/autocomplete/budgeted-content-assist.ts @@ -0,0 +1,329 @@ +// ============================================================================= +// Budgeted content-assist +// ============================================================================= +// Wraps Chevrotain's content-assist path exploration with a hard budget on the +// number of paths explored. Chevrotain's pruning only activates once some +// parse path consumes the input to its end; for malformed input where no +// complete parse exists (unbalanced parens, an unsupported clause, stray +// terminators) the DFS fans out — often exponentially in the size of the +// select list — and never terminates in practice. +// +// The implementation below is a verbatim port of Chevrotain 11's +// `nextPossibleTokensAfter` from `@chevrotain/lib/src/parse/grammar/interpreter.js` +// with a single change: after each path is popped from the stack we increment +// a counter and abort once it exceeds `maxPaths`. On abort we return whatever +// complete paths were found so far — for the pathological inputs we care about +// that set is typically empty, which the caller interprets as "no suggestions". +// ============================================================================= + +import type { IToken, TokenType } from "chevrotain" +import { + Alternation, + Alternative, + NonTerminal, + Option, + Repetition, + RepetitionMandatory, + RepetitionMandatoryWithSeparator, + RepetitionWithSeparator, + Rule, + Terminal, +} from "chevrotain" +import { parser } from "../parser/parser" + +export interface ContentAssistSuggestion { + nextTokenType: TokenType + nextTokenOccurrence: number + ruleStack: string[] + occurrenceStack: number[] +} + +export interface BudgetedResult { + suggestions: ContentAssistSuggestion[] + /** True if the path budget was hit before exploration finished. */ + aborted: boolean +} + +// Budget chosen by measurement: valid queries with up to ~200 select items +// explore ~66k paths. Pathological inputs (trailing comma + function calls in +// select list) grow ~4x per item and never terminate. 500k gives ~10x headroom +// over the largest valid case while aborting pathological inputs in <200ms. +export const DEFAULT_MAX_PATHS = 500_000 + +// Chevrotain's path interpreter mixes grammar productions with string sentinels +// in the `def` arrays and stacks. We model both slots as `unknown[]` to mirror +// Chevrotain's own loose JS typing and cast through IProduction only where the +// productions' constructors demand it. +const EXIT_NON_TERMINAL = "EXIT_NONE_TERMINAL" +const EXIT_NON_TERMINAL_ARR: readonly unknown[] = [EXIT_NON_TERMINAL] +const EXIT_ALTERNATIVE = "EXIT_ALTERNATIVE" + +// Accessors for parser internals that Chevrotain's ContentAssist trait uses. +// These fields are not in the public .d.ts but are set by the framework on the +// parser instance and are stable across the Chevrotain 11.x line. +interface ParserInternals { + tokenMatcher: (token: IToken, tokenType: TokenType) => boolean + maxLookahead: number + gastProductionsCache: Record +} + +const parserInternals = parser as unknown as ParserInternals + +interface Path { + idx: number + def: unknown[] + ruleStack: string[] + occurrenceStack: number[] +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type ProductionDef = any[] // concrete type hidden by Chevrotain's API + +function concatDef(...parts: unknown[][]): unknown[] { + const out: unknown[] = [] + for (const p of parts) out.push(...p) + return out +} + +function nextPossibleTokensAfterBudgeted( + initialDef: unknown[], + tokenVector: IToken[], + tokMatcher: (t: IToken, tt: TokenType) => boolean, + maxLookAhead: number, + maxPaths: number, +): BudgetedResult { + let foundCompletePath = false + const tokenVectorLength = tokenVector.length + const minimalAlternativesIndex = tokenVectorLength - maxLookAhead - 1 + const result: ContentAssistSuggestion[] = [] + const possiblePaths: unknown[] = [] + possiblePaths.push({ + idx: -1, + def: initialDef, + ruleStack: [], + occurrenceStack: [], + } satisfies Path) + + let pathsExplored = 0 + while (possiblePaths.length > 0) { + pathsExplored++ + if (pathsExplored > maxPaths) { + return { suggestions: result, aborted: true } + } + + const currPath = possiblePaths.pop() + if (currPath === EXIT_ALTERNATIVE) { + if (foundCompletePath) { + const top = possiblePaths[possiblePaths.length - 1] + if ( + typeof top === "object" && + top !== null && + typeof (top as Path).idx === "number" && + (top as Path).idx <= minimalAlternativesIndex + ) { + possiblePaths.pop() + } + } + continue + } + if (typeof currPath !== "object" || currPath === null) continue + + const path = currPath as Path + const currDef = path.def + const currIdx = path.idx + const currRuleStack = path.ruleStack + const currOccurrenceStack = path.occurrenceStack + if (currDef.length === 0) continue + + const prod = currDef[0] + if (prod === EXIT_NON_TERMINAL) { + possiblePaths.push({ + idx: currIdx, + def: currDef.slice(1), + ruleStack: currRuleStack.slice(0, -1), + occurrenceStack: currOccurrenceStack.slice(0, -1), + } satisfies Path) + } else if (prod instanceof Terminal) { + if (currIdx < tokenVectorLength - 1) { + const nextIdx = currIdx + 1 + const actualToken = tokenVector[nextIdx] + if (tokMatcher(actualToken, prod.terminalType)) { + possiblePaths.push({ + idx: nextIdx, + def: currDef.slice(1), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } + } else if (currIdx === tokenVectorLength - 1) { + result.push({ + nextTokenType: prod.terminalType, + nextTokenOccurrence: prod.idx, + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + }) + foundCompletePath = true + } + } else if (prod instanceof NonTerminal) { + const newRuleStack = currRuleStack.slice() + newRuleStack.push(prod.nonTerminalName) + const newOccurrenceStack = currOccurrenceStack.slice() + newOccurrenceStack.push(prod.idx) + possiblePaths.push({ + idx: currIdx, + def: concatDef( + prod.definition as ProductionDef, + EXIT_NON_TERMINAL_ARR as unknown[], + currDef.slice(1), + ), + ruleStack: newRuleStack, + occurrenceStack: newOccurrenceStack, + } satisfies Path) + } else if (prod instanceof Option) { + possiblePaths.push({ + idx: currIdx, + def: currDef.slice(1), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + possiblePaths.push(EXIT_ALTERNATIVE) + possiblePaths.push({ + idx: currIdx, + def: concatDef(prod.definition as ProductionDef, currDef.slice(1)), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof RepetitionMandatory) { + const secondIteration = new Repetition({ + definition: prod.definition, + idx: prod.idx, + }) + possiblePaths.push({ + idx: currIdx, + def: concatDef( + prod.definition as ProductionDef, + [secondIteration], + currDef.slice(1), + ), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof RepetitionMandatoryWithSeparator) { + const separatorGast = new Terminal({ terminalType: prod.separator }) + const secondIteration = new Repetition({ + definition: [separatorGast, ...prod.definition], + idx: prod.idx, + }) + possiblePaths.push({ + idx: currIdx, + def: concatDef( + prod.definition as ProductionDef, + [secondIteration], + currDef.slice(1), + ), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof RepetitionWithSeparator) { + possiblePaths.push({ + idx: currIdx, + def: currDef.slice(1), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + possiblePaths.push(EXIT_ALTERNATIVE) + const separatorGast = new Terminal({ terminalType: prod.separator }) + const nthRepetition = new Repetition({ + definition: [separatorGast, ...prod.definition], + idx: prod.idx, + }) + possiblePaths.push({ + idx: currIdx, + def: concatDef( + prod.definition as ProductionDef, + [nthRepetition], + currDef.slice(1), + ), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof Repetition) { + possiblePaths.push({ + idx: currIdx, + def: currDef.slice(1), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + possiblePaths.push(EXIT_ALTERNATIVE) + const nthRepetition = new Repetition({ + definition: prod.definition, + idx: prod.idx, + }) + possiblePaths.push({ + idx: currIdx, + def: concatDef( + prod.definition as ProductionDef, + [nthRepetition], + currDef.slice(1), + ), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof Alternation) { + for (let i = prod.definition.length - 1; i >= 0; i--) { + const currAlt = prod.definition[i] as Alternative + possiblePaths.push({ + idx: currIdx, + def: concatDef(currAlt.definition as ProductionDef, currDef.slice(1)), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + possiblePaths.push(EXIT_ALTERNATIVE) + } + } else if (prod instanceof Alternative) { + possiblePaths.push({ + idx: currIdx, + def: concatDef(prod.definition as ProductionDef, currDef.slice(1)), + ruleStack: currRuleStack, + occurrenceStack: currOccurrenceStack, + } satisfies Path) + } else if (prod instanceof Rule) { + const newRuleStack = currRuleStack.slice() + newRuleStack.push(prod.name) + const newCurrOccurrenceStack = currOccurrenceStack.slice() + newCurrOccurrenceStack.push(1) + possiblePaths.push({ + idx: currIdx, + def: prod.definition as ProductionDef, + ruleStack: newRuleStack, + occurrenceStack: newCurrOccurrenceStack, + } satisfies Path) + } + } + + return { suggestions: result, aborted: false } +} + +/** + * Drop-in replacement for `parser.computeContentAssist` that aborts after a + * fixed number of explored paths. Returns the partial set of suggestions + * collected before abort (typically empty for inputs that trigger the + * exponential blow-up) along with an `aborted` flag. + */ +export function computeContentAssistBudgeted( + ruleName: string, + tokens: IToken[], + maxPaths: number = DEFAULT_MAX_PATHS, +): BudgetedResult { + const gast = parserInternals.gastProductionsCache[ruleName] + if (!gast) { + throw new Error(`Rule ->${ruleName}<- does not exist in this grammar.`) + } + return nextPossibleTokensAfterBudgeted( + [gast], + tokens, + parserInternals.tokenMatcher, + parserInternals.maxLookahead, + maxPaths, + ) +} diff --git a/src/autocomplete/content-assist.ts b/src/autocomplete/content-assist.ts index 5e6dd28..fb31af7 100644 --- a/src/autocomplete/content-assist.ts +++ b/src/autocomplete/content-assist.ts @@ -1,5 +1,5 @@ import { type ILexingError, IToken, TokenType } from "chevrotain" -import { parser, parse as parseRaw } from "../parser/parser" +import { parse as parseRaw } from "../parser/parser" import { visitor } from "../parser/visitor" import { QuestDBLexer } from "../parser/lexer" import type { Statement } from "../parser/ast" @@ -7,6 +7,7 @@ import { IDENTIFIER_KEYWORD_TOKENS, EXPRESSION_OPERATORS, } from "./token-classification" +import { computeContentAssistBudgeted } from "./budgeted-content-assist" // ============================================================================= // Constants @@ -850,7 +851,9 @@ function computeSuggestions(tokens: IToken[]): ComputeResult { const ruleName = tokens.some((t) => t.tokenType.name === "Semicolon") ? "statements" : "statement" - const suggestions = parser.computeContentAssist(ruleName, tokens) + // Budgeted: aborts after a fixed number of path-exploration steps. + // The budget keeps autocomplete responsive and falls back to empty suggestions when hit. + const { suggestions } = computeContentAssistBudgeted(ruleName, tokens) const result = suggestions.map((s) => s.nextTokenType) // Walk every IdentifierKeyword path and union the category flags valid at @@ -875,7 +878,10 @@ function computeSuggestions(tokens: IToken[]): ComputeResult { const collapsed = collapseTrailingQualifiedRef(tokens) if (collapsed) { try { - const extra = parser.computeContentAssist(ruleName, collapsed) + const extra = computeContentAssistBudgeted( + ruleName, + collapsed, + ).suggestions const seen = new Set(result.map((t) => t.name)) for (const s of extra) { if (!seen.has(s.nextTokenType.name)) { diff --git a/src/autocomplete/suggestion-builder.ts b/src/autocomplete/suggestion-builder.ts index 9fa6262..313db0c 100644 --- a/src/autocomplete/suggestion-builder.ts +++ b/src/autocomplete/suggestion-builder.ts @@ -165,15 +165,30 @@ export function buildSuggestions( // (e.g., "Left" → "LEFT JOIN") instead of suggesting bare "LEFT". if (isJoinContext && JOIN_COMPOUND_MAP.has(name)) { const compound = JOIN_COMPOUND_MAP.get(name)! - if (seenKeywords.has(compound)) continue - seenKeywords.add(compound) - suggestions.push({ - label: compound, - kind: SuggestionKind.Keyword, - insertText: compound, - filterText: compound.toLowerCase(), - priority: SuggestionPriority.Medium, - }) + if (!seenKeywords.has(compound)) { + seenKeywords.add(compound) + suggestions.push({ + label: compound, + kind: SuggestionKind.Keyword, + insertText: compound, + filterText: compound.toLowerCase(), + priority: SuggestionPriority.Medium, + }) + } + // `Window` is special: the token is also the start of the standalone + // named-window clause (SELECT ... FROM t WINDOW w AS (...)), which is + // a valid continuation after fromClause alongside WINDOW JOIN. Emit + // the bare "WINDOW" keyword so users can discover and type it. + if (name === "Window" && !seenKeywords.has("WINDOW")) { + seenKeywords.add("WINDOW") + suggestions.push({ + label: "WINDOW", + kind: SuggestionKind.Keyword, + insertText: "WINDOW", + filterText: "window", + priority: SuggestionPriority.Medium, + }) + } continue } diff --git a/src/parser/ast.ts b/src/parser/ast.ts index 89bf27d..5c88719 100644 --- a/src/parser/ast.ts +++ b/src/parser/ast.ts @@ -72,11 +72,29 @@ export interface SelectStatement extends AstNode { latestOn?: LatestOnClause groupBy?: Expression[] pivot?: PivotClause + /** Named window definitions: SELECT ... WINDOW w AS (...) [, w2 AS (...)] */ + namedWindows?: NamedWindow[] orderBy?: OrderByItem[] limit?: LimitClause setOperations?: SetOperation[] } +/** + * A named window definition as used in the WINDOW clause: + * WINDOW w AS ([base_window] [PARTITION BY ...] [ORDER BY ...] [frame]) + * Referenced from window-function OVER clauses by name: `avg(x) OVER w`. + * Introduced in QuestDB as of Feb 2026. + */ +export interface NamedWindow extends AstNode { + type: "namedWindow" + name: string + /** Inherited base window (e.g. WINDOW w2 AS (w1 ORDER BY x)). */ + baseWindow?: string + partitionBy?: Expression[] + orderBy?: OrderByItem[] + frame?: WindowFrame +} + export interface CTE extends AstNode { type: "cte" name: string @@ -1022,6 +1040,12 @@ export interface FunctionCall extends AstNode { export interface WindowSpecification extends AstNode { type: "windowSpec" + /** + * Named window reference (e.g. `avg(x) OVER w`). When set, the other + * fields are not used — the function references a named window defined + * in the SELECT's `namedWindows` list. + */ + windowName?: string partitionBy?: Expression[] orderBy?: OrderByItem[] frame?: WindowFrame diff --git a/src/parser/cst-types.d.ts b/src/parser/cst-types.d.ts index f4f78ed..b69863c 100644 --- a/src/parser/cst-types.d.ts +++ b/src/parser/cst-types.d.ts @@ -127,6 +127,7 @@ export type SimpleSelectCstChildren = { LParen?: IToken[]; pivotBody?: PivotBodyCstNode[]; RParen?: IToken[]; + windowClause?: WindowClauseCstNode[]; orderByClause?: OrderByClauseCstNode[]; limitClause?: LimitClauseCstNode[]; }; @@ -2369,6 +2370,42 @@ export type OverClauseCstChildren = { identifier?: IdentifierCstNode[]; }; +export interface WindowClauseCstNode extends CstNode { + name: "windowClause"; + children: WindowClauseCstChildren; +} + +export type WindowClauseCstChildren = { + Window: IToken[]; + namedWindow: (NamedWindowCstNode)[]; + Comma?: IToken[]; +}; + +export interface NamedWindowCstNode extends CstNode { + name: "namedWindow"; + children: NamedWindowCstChildren; +} + +export type NamedWindowCstChildren = { + identifier: IdentifierCstNode[]; + As: IToken[]; + LParen: IToken[]; + windowSpec: WindowSpecCstNode[]; + RParen: IToken[]; +}; + +export interface WindowSpecCstNode extends CstNode { + name: "windowSpec"; + children: WindowSpecCstChildren; +} + +export type WindowSpecCstChildren = { + identifier?: IdentifierCstNode[]; + windowPartitionByClause?: WindowPartitionByClauseCstNode[]; + orderByClause?: OrderByClauseCstNode[]; + windowFrameClause?: WindowFrameClauseCstNode[]; +}; + export interface WindowPartitionByClauseCstNode extends CstNode { name: "windowPartitionByClause"; children: WindowPartitionByClauseCstChildren; @@ -2691,6 +2728,9 @@ export interface ICstNodeVisitor extends ICstVisitor { functionCall(children: FunctionCallCstChildren, param?: IN): OUT; identifierExpression(children: IdentifierExpressionCstChildren, param?: IN): OUT; overClause(children: OverClauseCstChildren, param?: IN): OUT; + windowClause(children: WindowClauseCstChildren, param?: IN): OUT; + namedWindow(children: NamedWindowCstChildren, param?: IN): OUT; + windowSpec(children: WindowSpecCstChildren, param?: IN): OUT; windowPartitionByClause(children: WindowPartitionByClauseCstChildren, param?: IN): OUT; windowFrameClause(children: WindowFrameClauseCstChildren, param?: IN): OUT; windowFrameBound(children: WindowFrameBoundCstChildren, param?: IN): OUT; diff --git a/src/parser/parser.ts b/src/parser/parser.ts index b433025..3593f15 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -609,6 +609,15 @@ class QuestDBParser extends CstParser { this.SUBRULE(this.pivotBody) this.CONSUME(RParen) }) + // Named window clause: WINDOW w AS (...) [, w2 AS (...)] + // Standard SQL places WINDOW between HAVING/GROUP BY and ORDER BY. + // GATE: disambiguate from WINDOW JOIN (which lives inside fromClause, + // and appears as the first token of a join, not here after GROUP BY). + this.option(10, { + GATE: () => + this.LA(1).tokenType === Window && this.LA(2).tokenType !== Join, + DEF: () => this.SUBRULE(this.windowClause), + }) this.OPTION6(() => this.SUBRULE(this.orderByClause)) this.OPTION7(() => this.SUBRULE(this.limitClause)) }) @@ -639,6 +648,10 @@ class QuestDBParser extends CstParser { this.CONSUME(Comma) this.SUBRULE1(this.selectItem) }) + // QuestDB accepts a trailing comma in the select list — matches + // the Java parser's behaviour. Default 2-token lookahead keeps + // MANY from entering on a trailing "," followed by a keyword. + this.OPTION(() => this.CONSUME2(Comma)) }, }, { @@ -648,6 +661,7 @@ class QuestDBParser extends CstParser { this.CONSUME1(Comma) this.SUBRULE2(this.selectItem) }) + this.OPTION1(() => this.CONSUME3(Comma)) }, }, ]) @@ -694,17 +708,30 @@ class QuestDBParser extends CstParser { private fromClause = this.RULE("fromClause", () => { this.SUBRULE(this.fromSource) - this.MANY(() => { - this.OR([ - { ALT: () => this.SUBRULE(this.joinClause) }, - { - ALT: () => { - this.CONSUME(Comma) - this.OPTION(() => this.CONSUME(Lateral)) - this.SUBRULE1(this.fromSource) + this.MANY({ + // WINDOW/PREVAILING can be either the start of a WINDOW/PREVAILING JOIN + // or the start of the top-level WINDOW clause that follows fromClause + // (SELECT ... FROM t WINDOW w AS (...)). Only iterate when the next + // two tokens form a JOIN so the top-level WINDOW clause keeps its tokens. + GATE: () => { + const la1 = this.LA(1).tokenType + if (la1 === Window || la1 === Prevailing) { + return this.LA(2).tokenType === Join + } + return true + }, + DEF: () => { + this.OR([ + { ALT: () => this.SUBRULE(this.joinClause) }, + { + ALT: () => { + this.CONSUME(Comma) + this.OPTION(() => this.CONSUME(Lateral)) + this.SUBRULE1(this.fromSource) + }, }, - }, - ]) + ]) + }, }) }) @@ -4078,6 +4105,61 @@ class QuestDBParser extends CstParser { ]) }) + // ========================================================================== + // Named window clause (SELECT ... WINDOW w AS (...)). + // Introduced in QuestDB PR #6746 (Feb 2026). + // Syntax: WINDOW name AS ( [base_name] [PARTITION BY ...] [ORDER BY ...] + // [frame] [exclude] ) [, name AS (...)] + // ========================================================================== + private windowClause = this.RULE("windowClause", () => { + this.CONSUME(Window) + this.SUBRULE(this.namedWindow) + this.MANY(() => { + this.CONSUME(Comma) + this.SUBRULE1(this.namedWindow) + }) + }) + + private namedWindow = this.RULE("namedWindow", () => { + this.SUBRULE(this.identifier) + this.CONSUME(As) + this.CONSUME(LParen) + this.SUBRULE(this.windowSpec) + this.CONSUME(RParen) + }) + + private windowSpec = this.RULE("windowSpec", () => { + // Optional base window name (window inheritance). Matches the Java + // parser's logic: a bare non-keyword identifier followed by ')' or a + // window-spec keyword signals inheritance (e.g. WINDOW w2 AS (w1) or + // (w1 ORDER BY x)). If the first token is already a spec keyword, + // there is no base window. + this.OPTION({ + GATE: () => { + const la1Tok = this.LA(1) + const isIdentifierLike = + la1Tok.tokenType === Identifier || + la1Tok.tokenType === QuotedIdentifier || + tokenMatcher(la1Tok, IdentifierKeyword) + if (!isIdentifierLike) return false + const la2 = this.LA(2).tokenType + return ( + la2 === RParen || + la2 === Partition || + la2 === Order || + la2 === Rows || + la2 === Range || + la2 === Cumulative || + la2 === Groups + ) + }, + DEF: () => this.SUBRULE(this.identifier), + }) + this.OPTION1(() => this.SUBRULE(this.windowPartitionByClause)) + this.OPTION2(() => this.SUBRULE(this.orderByClause)) + this.OPTION3(() => this.SUBRULE(this.windowFrameClause)) + }) + private windowPartitionByClause = this.RULE("windowPartitionByClause", () => { this.CONSUME(Partition) this.CONSUME(By) diff --git a/src/parser/toSql.ts b/src/parser/toSql.ts index 86851ae..7930cfe 100644 --- a/src/parser/toSql.ts +++ b/src/parser/toSql.ts @@ -243,6 +243,12 @@ function selectToSql(stmt: AST.SelectStatement): string { parts.push(pivotClauseToSql(stmt.pivot)) } + // WINDOW w AS (...) [, w2 AS (...)] + if (stmt.namedWindows && stmt.namedWindows.length > 0) { + parts.push("WINDOW") + parts.push(stmt.namedWindows.map(namedWindowToSql).join(", ")) + } + // ORDER BY if (stmt.orderBy && stmt.orderBy.length > 0) { parts.push("ORDER BY") @@ -1690,7 +1696,12 @@ function functionToSql(fn: AST.FunctionCall): string { } if (fn.over) { - sql += ` OVER (${windowSpecToSql(fn.over)})` + // Named window reference: `OVER w` (no parens). + if (fn.over.windowName) { + sql += ` OVER ${fn.over.windowName}` + } else { + sql += ` OVER (${windowSpecToSql(fn.over)})` + } } return sql @@ -1716,6 +1727,21 @@ function windowSpecToSql(spec: AST.WindowSpecification): string { return parts.join(" ") } +function namedWindowToSql(w: AST.NamedWindow): string { + const inner: string[] = [] + if (w.baseWindow) inner.push(w.baseWindow) + if (w.partitionBy && w.partitionBy.length > 0) { + inner.push(`PARTITION BY ${w.partitionBy.map(expressionToSql).join(", ")}`) + } + if (w.orderBy && w.orderBy.length > 0) { + inner.push(`ORDER BY ${w.orderBy.map(orderByItemToSql).join(", ")}`) + } + if (w.frame) { + inner.push(windowFrameToSql(w.frame)) + } + return `${w.name} AS (${inner.join(" ")})` +} + function windowFrameToSql(frame: AST.WindowFrame): string { const mode = frame.mode.toUpperCase() let sql: string diff --git a/src/parser/visitor.ts b/src/parser/visitor.ts index 0ea347c..72728bd 100644 --- a/src/parser/visitor.ts +++ b/src/parser/visitor.ts @@ -171,11 +171,14 @@ import type { ValuesClauseCstChildren, ValuesListCstChildren, WhereClauseCstChildren, + WindowClauseCstChildren, WindowFrameBoundCstChildren, WindowFrameClauseCstChildren, WindowJoinBoundCstChildren, WindowJoinCstChildren, WindowPartitionByClauseCstChildren, + WindowSpecCstChildren, + NamedWindowCstChildren, WithClauseCstChildren, WithStatementCstChildren, } from "./cst-types" @@ -446,6 +449,10 @@ class QuestDBVisitor extends BaseVisitor { } } + if (ctx.windowClause) { + result.namedWindows = this.visit(ctx.windowClause) as AST.NamedWindow[] + } + if (ctx.orderByClause) { result.orderBy = this.visit(ctx.orderByClause) as AST.OrderByItem[] } @@ -3754,6 +3761,14 @@ class QuestDBVisitor extends BaseVisitor { } overClause(ctx: OverClauseCstChildren): AST.WindowSpecification { + // Named window reference: `OVER w` — the grammar's other branch. + if (ctx.identifier && ctx.identifier.length > 0) { + return { + type: "windowSpec", + windowName: this.extractIdentifierName(ctx.identifier[0].children), + } + } + const result: AST.WindowSpecification = { type: "windowSpec", } @@ -3775,6 +3790,49 @@ class QuestDBVisitor extends BaseVisitor { return result } + windowClause(ctx: WindowClauseCstChildren): AST.NamedWindow[] { + return ctx.namedWindow.map((n) => this.visit(n) as AST.NamedWindow) + } + + namedWindow(ctx: NamedWindowCstChildren): AST.NamedWindow { + const spec = this.visit(ctx.windowSpec) as Omit< + AST.NamedWindow, + "type" | "name" + > + return { + type: "namedWindow", + name: this.extractIdentifierName(ctx.identifier[0].children), + ...spec, + } + } + + windowSpec( + ctx: WindowSpecCstChildren, + ): Omit { + const result: Omit = {} + + // Optional base window name (inheritance) + if (ctx.identifier && ctx.identifier.length > 0) { + result.baseWindow = this.extractIdentifierName(ctx.identifier[0].children) + } + + if (ctx.windowPartitionByClause) { + result.partitionBy = this.visit( + ctx.windowPartitionByClause, + ) as AST.Expression[] + } + + if (ctx.orderByClause) { + result.orderBy = this.visit(ctx.orderByClause) as AST.OrderByItem[] + } + + if (ctx.windowFrameClause) { + result.frame = this.visit(ctx.windowFrameClause) as AST.WindowFrame + } + + return result + } + windowPartitionByClause( ctx: WindowPartitionByClauseCstChildren, ): AST.Expression[] { diff --git a/tests/autocomplete-perf.test.ts b/tests/autocomplete-perf.test.ts new file mode 100644 index 0000000..0418a7b --- /dev/null +++ b/tests/autocomplete-perf.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect } from "vitest" +import { createAutocompleteProvider } from "../src/autocomplete" + +// These tests exercise the path-count budget in budgeted-content-assist.ts. +// Each input is a case where no complete parse path exists, so Chevrotain's +// EXIT_ALTERNATIVE pruning never activates and the DFS fans out exponentially +// in the size of the select list. Without the budget, each of these would +// hang the UI; with it, they abort in well under a second and return empty +// suggestions (the correct answer when there's no syntactic continuation). + +describe("autocomplete — path-count budget regression tests", () => { + const schema = { + tables: [{ name: "trades" }, { name: "orders" }], + columns: { + trades: [ + { name: "symbol", type: "STRING" }, + { name: "price", type: "DOUBLE" }, + { name: "timestamp", type: "TIMESTAMP" }, + ], + orders: [ + { name: "id", type: "LONG" }, + { name: "user_id", type: "LONG" }, + ], + }, + } + const provider = createAutocompleteProvider(schema) + + // Each pathological SQL body is parameterised by how many aggregate items + // precede the malformed tail. Growth is ~2-4x per item; n=12-15 is already + // multi-second without the budget. + const aggItems = (n: number): string => + Array(n) + .fill(0) + .map((_, i) => `avg(price) a${i}`) + .join(", ") + + it("survives unbalanced extra closing parens on a big select list", () => { + // Direct analogue of the original bug report's tail shape (a subquery + // that closes with more ")" than it opens). Raw Chevrotain at n=15 runs + // ~7.7s. + const sql = `SELECT ${aggItems(15)} FROM trades)))` + + const start = performance.now() + const suggestions = provider.getSuggestions(sql, sql.length) + const elapsed = performance.now() - start + + expect(elapsed).toBeLessThan(1000) + expect(Array.isArray(suggestions)).toBe(true) + }) + + it("survives an unsupported JOIN ... USING (col) clause", () => { + // USING is standard SQL but not in QuestDB's grammar — join conditions + // go through ON only. Raw Chevrotain on n=12 runs ~1.9s. + const sql = `SELECT ${aggItems(12)} FROM trades a JOIN orders b USING (id)` + + const start = performance.now() + const suggestions = provider.getSuggestions(sql, sql.length) + const elapsed = performance.now() - start + + expect(elapsed).toBeLessThan(1000) + expect(Array.isArray(suggestions)).toBe(true) + }) + + it("survives stray trailing semicolons inside a statement", () => { + // `SELECT ... FROM t;;;;` — the first `;` ends the statement, but the + // extra ones have no grammar home and keep the outer `statements` MANY + // from closing cleanly. Exercises the budget at relatively small n. + const sql = `SELECT ${aggItems(10)} FROM trades;;;;` + + const start = performance.now() + const suggestions = provider.getSuggestions(sql, sql.length) + const elapsed = performance.now() - start + + expect(elapsed).toBeLessThan(1000) + expect(Array.isArray(suggestions)).toBe(true) + }) + + it("stays fast on a large well-formed query (budget headroom)", () => { + // A syntactically valid query with 50 aggregate select items, a WHERE + // clause, SAMPLE BY, and ORDER BY. Pruning is active because the parse + // completes, so the DFS stays linear and the 500k budget should never + // be approached. Guards against shrinking the budget or adding grammar + // forks that would push valid queries past it. + const items = Array(50) + .fill(0) + .map((_, i) => `count(CASE WHEN symbol = 'S${i}' THEN 1 END) as col${i}`) + .join(", ") + const sql = `SELECT ${items} FROM trades WHERE symbol = 'X' SAMPLE BY 1h ORDER BY timestamp` + + const start = performance.now() + const suggestions = provider.getSuggestions(sql, sql.length) + const elapsed = performance.now() - start + + expect(elapsed).toBeLessThan(200) + expect(suggestions.length).toBeGreaterThan(0) + }) +}) diff --git a/tests/autocomplete.test.ts b/tests/autocomplete.test.ts index f070c05..adb496e 100644 --- a/tests/autocomplete.test.ts +++ b/tests/autocomplete.test.ts @@ -4243,4 +4243,326 @@ describe("Position-typed suggestions — by statement type", () => { }) }) }) + + // =========================================================================== + // Comprehensive walkthrough — RSI finance cookbook query (QuestDB docs). + // A 4-CTE pipeline that exercises nearly every autocomplete code path: + // CTE column scoping across a chain, SAMPLE BY, inline OVER window, + // WINDOW clause with named window, CASE expressions, line comments, and + // the final outer SELECT that only sees the last CTE's columns. + // =========================================================================== + describe("RSI query — comprehensive cursor walkthrough", () => { + const rsiSchema = { + tables: [{ name: "fx_trades_ohlc_1m", designatedTimestamp: "timestamp" }], + columns: { + fx_trades_ohlc_1m: [ + { name: "timestamp", type: "TIMESTAMP" }, + { name: "symbol", type: "STRING" }, + { name: "open", type: "DOUBLE" }, + { name: "high", type: "DOUBLE" }, + { name: "low", type: "DOUBLE" }, + { name: "close", type: "DOUBLE" }, + { name: "total_volume", type: "DOUBLE" }, + ], + }, + } + const rsiProvider = createAutocompleteProvider(rsiSchema) + + // Exact byte-for-byte copy of the user-reported RSI query from the + // QuestDB finance cookbook. Preserves: + // - trailing space after `OVER w` inside the two line comments + // - trailing semicolon after `ORDER BY timestamp` + // Do not reformat — prettier's trailing-whitespace stripping would + // silently change the fixture away from the real input. + const rsi = + "WITH ohlc AS (\n" + + " SELECT\n" + + " timestamp,\n" + + " first(open) AS open,\n" + + " max(high) AS high,\n" + + " min(low) AS low,\n" + + " last(close) AS close,\n" + + " sum(total_volume) AS total_volume\n" + + " FROM fx_trades_ohlc_1m\n" + + " WHERE symbol = 'EURUSD'\n" + + " AND timestamp > dateadd('d', -2, now())\n" + + " SAMPLE BY 15m\n" + + "), changes AS (\n" + + " SELECT\n" + + " timestamp,\n" + + " close,\n" + + " close - lag(close) OVER (ORDER BY timestamp) AS change\n" + + " FROM ohlc\n" + + "), gains_losses AS (\n" + + " SELECT\n" + + " timestamp,\n" + + " close,\n" + + " CASE WHEN change > 0 THEN change ELSE 0 END AS gain,\n" + + " CASE WHEN change < 0 THEN -change ELSE 0 END AS loss\n" + + " FROM changes\n" + + "), smoothed AS (\n" + + " SELECT\n" + + " timestamp,\n" + + " close,\n" + + " 100 --avg(gain, 'period', 14) OVER w \n" + + " AS avg_gain,\n" + + " 100 -- avg(loss, 'period', 14) OVER w \n" + + " AS avg_loss\n" + + " FROM gains_losses\n" + + " WINDOW w AS (ORDER BY timestamp)\n" + + ")\n" + + "SELECT\n" + + " timestamp,\n" + + " close,\n" + + " CASE\n" + + " WHEN avg_loss = 0 THEN 100\n" + + " ELSE 100 - (100 / (1 + avg_gain / avg_loss))\n" + + " END AS rsi,\n" + + " 70.0 AS overbought,\n" + + " 30.0 AS oversold\n" + + "FROM smoothed\n" + + "ORDER BY timestamp;" + + /** + * Position the cursor immediately after the first occurrence of `anchor` + * in the query. Fails the test if the anchor is not found so a drifting + * fixture doesn't silently pass. + */ + const cursorAfter = (anchor: string): number => { + const idx = rsi.indexOf(anchor) + if (idx < 0) throw new Error(`anchor not found in fixture: ${anchor}`) + return idx + anchor.length + } + + const getLabelsAt = (cursor: number): string[] => + rsiProvider.getSuggestions(rsi, cursor).map((s) => s.label) + + const getKindAt = (cursor: number, kind: SuggestionKind): string[] => + rsiProvider + .getSuggestions(rsi, cursor) + .filter((s) => s.kind === kind) + .map((s) => s.label) + + it("outer SELECT sees `smoothed`'s projected columns (last CTE)", () => { + // Inside the outer SELECT after `close,` the cursor should see the + // columns that the `smoothed` CTE exposes — not fx_trades_ohlc_1m's. + const cursor = cursorAfter("\n close,\n CASE") + // Cursor right before `CASE`: expression position, CTE columns available. + const labels = getLabelsAt(cursor - "CASE".length) + expect(labels).toContain("timestamp") + expect(labels).toContain("close") + expect(labels).toContain("avg_gain") + expect(labels).toContain("avg_loss") + }) + + it("outer FROM suggests every CTE name", () => { + const cursor = cursorAfter( + "END AS rsi,\n 70.0 AS overbought,\n 30.0 AS oversold\nFROM ", + ) + const tables = getKindAt(cursor, SuggestionKind.Table) + // All four CTEs should be offered as table sources. + expect(tables).toEqual( + expect.arrayContaining(["ohlc", "changes", "gains_losses", "smoothed"]), + ) + }) + + it("outer ORDER BY offers the selected CTE's columns", () => { + const cursor = cursorAfter("FROM smoothed\nORDER BY ") + const labels = getLabelsAt(cursor) + expect(labels).toContain("timestamp") + expect(labels).toContain("close") + }) + + it("inside ohlc CTE after FROM suggests the base table", () => { + const cursor = cursorAfter("sum(total_volume) AS total_volume\n FROM ") + const tables = getKindAt(cursor, SuggestionKind.Table) + expect(tables).toContain("fx_trades_ohlc_1m") + }) + + it("inside ohlc CTE WHERE clause suggests its base-table columns", () => { + const cursor = cursorAfter("WHERE ") + const labels = getLabelsAt(cursor) + expect(labels).toContain("symbol") + expect(labels).toContain("timestamp") + expect(labels).toContain("open") + expect(labels).toContain("close") + }) + + it("inside ohlc CTE after `AND ` suggests columns", () => { + const cursor = cursorAfter(" AND ") + const labels = getLabelsAt(cursor) + expect(labels).toContain("timestamp") + expect(labels).toContain("symbol") + }) + + it("inside changes CTE FROM suggests the preceding CTE `ohlc`", () => { + const cursor = cursorAfter( + "close - lag(close) OVER (ORDER BY timestamp) AS change\n FROM ", + ) + const tables = getKindAt(cursor, SuggestionKind.Table) + // `ohlc` is available as a table source inside `changes`. + expect(tables).toContain("ohlc") + }) + + it("inside gains_losses FROM suggests the preceding CTE `changes`", () => { + const cursor = cursorAfter( + "CASE WHEN change < 0 THEN -change ELSE 0 END AS loss\n FROM ", + ) + const tables = getKindAt(cursor, SuggestionKind.Table) + expect(tables).toContain("changes") + }) + + it("inside gains_losses CASE expressions see `changes`'s columns", () => { + // CASE WHEN change > 0 THEN change ELSE 0 END — position right after WHEN. + const cursor = cursorAfter("CASE WHEN ") + const labels = getLabelsAt(cursor) + // `change` is the column alias produced by `changes`'s select list. + expect(labels).toContain("change") + expect(labels).toContain("timestamp") + expect(labels).toContain("close") + }) + + it("inside smoothed FROM suggests `gains_losses`", () => { + // Note the trailing space after `OVER w ` inside the comment — + // preserved to match the real-world query byte-for-byte. + const cursor = cursorAfter( + "100 -- avg(loss, 'period', 14) OVER w \n AS avg_loss\n FROM ", + ) + const tables = getKindAt(cursor, SuggestionKind.Table) + expect(tables).toContain("gains_losses") + }) + + it("WINDOW clause spec opens for PARTITION/ORDER keywords", () => { + // Cursor immediately after `WINDOW w AS (` — the start of windowSpec. + const cursor = cursorAfter("WINDOW w AS (") + const labels = getLabelsAt(cursor) + // windowSpec can start with PARTITION, ORDER, ROWS, RANGE, CUMULATIVE, + // or a base window name. At minimum, ORDER and PARTITION keywords must + // surface. + expect(labels).toContain("ORDER") + expect(labels).toContain("PARTITION") + }) + + it("ORDER BY inside WINDOW clause suggests gains_losses columns", () => { + // Cursor after the final `ORDER BY ` inside WINDOW w AS (ORDER BY ... + const cursor = cursorAfter("WINDOW w AS (ORDER BY ") + const labels = getLabelsAt(cursor) + // `gains_losses` is the scope inside this CTE; its columns must appear. + expect(labels).toContain("timestamp") + expect(labels).toContain("close") + expect(labels).toContain("gain") + expect(labels).toContain("loss") + }) + + it("CASE expression in outer SELECT offers CTE columns inside THEN/ELSE", () => { + // Cursor inside the outer SELECT's CASE expression — between WHEN and THEN. + const cursor = cursorAfter(" WHEN avg_loss = 0 THEN ") + const labels = getLabelsAt(cursor) + // `smoothed` columns are visible in expression position. + expect(labels).toContain("avg_gain") + expect(labels).toContain("avg_loss") + expect(labels).toContain("close") + }) + + it("query completes autocomplete at cursor end in under 500ms", () => { + // End-to-end sanity: the entire RSI pipeline must be responsive + // (pruning active, budget not hit) at the final ORDER BY position. + const start = performance.now() + rsiProvider.getSuggestions(rsi, rsi.length) + const elapsed = performance.now() - start + expect(elapsed).toBeLessThan(500) + }) + + it("every single character position in the full query returns quickly and doesn't throw", () => { + // Exhaustive resilience: park the cursor at every offset from 0 to the + // end of the query. At every position, autocomplete must return an + // array in bounded time — no crashes, no hangs. This is what a user + // clicking/arrow-keying anywhere in Monaco will exercise. + const slow: { cursor: number; ms: number }[] = [] + for (let cursor = 0; cursor <= rsi.length; cursor++) { + const start = performance.now() + let suggestions: Suggestion[] + try { + suggestions = rsiProvider.getSuggestions(rsi, cursor) + } catch (e) { + ;(e as Error).message = `[cursor=${cursor}] ${(e as Error).message}` + throw e + } + const elapsed = performance.now() - start + expect(Array.isArray(suggestions), `cursor=${cursor}`).toBe(true) + if (elapsed > 100) slow.push({ cursor, ms: elapsed }) + } + expect( + slow, + `positions slower than 100ms: ${JSON.stringify(slow)}`, + ).toEqual([]) + }) + + it("simulates the user typing the query from scratch — every prefix length", () => { + // For each progressive prefix (what the user has typed so far), with + // the cursor at the end of the prefix, autocomplete must return + // quickly and cleanly. This exercises a different mix of parser + // states than moving the cursor through the full text. + const slow: { len: number; ms: number }[] = [] + for (let len = 0; len <= rsi.length; len++) { + const prefix = rsi.slice(0, len) + const start = performance.now() + let suggestions: Suggestion[] + try { + suggestions = rsiProvider.getSuggestions(prefix, len) + } catch (e) { + ;(e as Error).message = + `[prefix length=${len}] ${(e as Error).message}` + throw e + } + const elapsed = performance.now() - start + expect(Array.isArray(suggestions), `prefix length=${len}`).toBe(true) + if (elapsed > 100) slow.push({ len, ms: elapsed }) + } + expect( + slow, + `prefix lengths slower than 100ms: ${JSON.stringify(slow)}`, + ).toEqual([]) + }) + + it("meaningful word boundaries in the query produce non-empty suggestions most of the time", () => { + // 'Word boundary' = position right after whitespace or punctuation — + // where a user would realistically ask for autocomplete. We don't + // require every boundary to be non-empty (inside line comments and + // at a few structurally ambiguous positions the parser may have no + // valid continuation), but the great majority must be useful. + const boundaries: number[] = [] + for (let i = 1; i <= rsi.length; i++) { + if (/[\s,()]/.test(rsi[i - 1])) boundaries.push(i) + } + let nonEmpty = 0 + for (const cursor of boundaries) { + if (rsiProvider.getSuggestions(rsi, cursor).length > 0) nonEmpty++ + } + // Over ~300 boundaries in this ~1000-char query; >85% should offer + // something useful. Catches regressions where a grammar change breaks + // suggestions at entire classes of positions. + const rate = nonEmpty / boundaries.length + expect(rate).toBeGreaterThan(0.85) + }) + + it("outer SELECT after `70.0 AS overbought,` offers CTE columns (mid-select-list)", () => { + // Cursor between two aliased literals in the outer SELECT. Must still + // recognise the select-list context and offer the CTE's columns. + const cursor = cursorAfter(" 70.0 AS overbought,\n ") + const labels = getLabelsAt(cursor) + expect(labels).toContain("timestamp") + expect(labels).toContain("close") + expect(labels).toContain("avg_gain") + }) + + it("WINDOW w keyword is suggested as a valid clause after FROM in a CTE body", () => { + // After the CTE's FROM clause, WINDOW is one of the valid continuations + // (between WHERE/GROUP BY and ORDER BY). Checks that the grammar change + // that added the WINDOW clause is visible to content-assist. + const cursor = cursorAfter(" FROM gains_losses\n ") + const labels = getLabelsAt(cursor) + expect(labels).toContain("WINDOW") + }) + }) }) diff --git a/tests/fixtures/docs-queries.json b/tests/fixtures/docs-queries.json index f12d6fa..34e60cf 100644 --- a/tests/fixtures/docs-queries.json +++ b/tests/fixtures/docs-queries.json @@ -5278,5 +5278,29 @@ }, { "query": "SELECT\n t.symbol,\n t.ecn,\n t.passive,\n h.offset / 1000000000 AS horizon_sec,\n count() AS n,\n avg(((m.best_bid + m.best_ask) / 2 - t.price)\n / t.price * 10000) AS avg_markout_bps,\n avg((m.best_ask - m.best_bid)\n / ((m.best_bid + m.best_ask) / 2) * 10000) / 2 AS avg_half_spread_bps\nFROM fx_trades t\nHORIZON JOIN market_data m ON (symbol)\n RANGE FROM 0s TO 5m STEP 1s AS h\nWHERE t.side = 'buy'\n AND t.timestamp IN '$yesterday'\nGROUP BY t.symbol, t.ecn, t.passive, horizon_sec\nORDER BY t.symbol, t.ecn, t.passive, horizon_sec" + }, + { + "query": "SELECT\n timestamp,\n symbol,\n price,\n avg(price) OVER w AS avg_price,\n min(price) OVER w AS min_price,\n max(price) OVER w AS max_price\nFROM trades\nWHERE symbol = 'BTC-USDT'\nWINDOW w AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)\nLIMIT 100" + }, + { + "query": "SELECT\n timestamp,\n symbol,\n price,\n avg(price) OVER short_window AS avg_10,\n avg(price) OVER long_window AS avg_50\nFROM trades\nWHERE symbol = 'BTC-USDT'\nWINDOW\n short_window AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW),\n long_window AS (ORDER BY timestamp ROWS BETWEEN 49 PRECEDING AND CURRENT ROW)\nLIMIT 100" + }, + { + "query": "SELECT\n timestamp,\n symbol,\n price,\n avg(price) OVER w AS moving_avg,\n row_number() OVER (PARTITION BY symbol ORDER BY timestamp) AS seq\nFROM trades\nWHERE symbol = 'BTC-USDT'\nWINDOW w AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)\nLIMIT 100" + }, + { + "query": "WITH price_stats AS (\n SELECT\n timestamp,\n symbol,\n price,\n avg(price) OVER w AS moving_avg,\n price - avg(price) OVER w AS deviation\n FROM trades\n WHERE symbol = 'BTC-USDT'\n WINDOW w AS (ORDER BY timestamp ROWS BETWEEN 19 PRECEDING AND CURRENT ROW)\n)\nSELECT * FROM price_stats\nWHERE deviation > 10\nLIMIT 100" + }, + { + "query": "SELECT\n timestamp,\n symbol,\n price,\n avg(price) OVER w1 AS symbol_avg,\n avg(price) OVER w2 AS moving_avg\nFROM trades\nWHERE symbol = 'BTC-USDT'\nWINDOW\n w1 AS (ORDER BY timestamp),\n w2 AS (w1 ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)\nLIMIT 100" + }, + { + "query": "SELECT x, y, row_number() OVER w AS row_num FROM t WINDOW w AS ()" + }, + { + "query": "SELECT avg(x) OVER w FROM t WINDOW w AS (ORDER BY ts CUMULATIVE)" + }, + { + "query": "WITH ohlc AS (\n SELECT\n timestamp,\n first(open) AS open,\n max(high) AS high,\n min(low) AS low,\n last(close) AS close,\n sum(total_volume) AS total_volume\n FROM fx_trades_ohlc_1m\n WHERE symbol = 'EURUSD'\n AND timestamp > dateadd('d', -2, now())\n SAMPLE BY 15m\n), changes AS (\n SELECT\n timestamp,\n close,\n close - lag(close) OVER (ORDER BY timestamp) AS change\n FROM ohlc\n), gains_losses AS (\n SELECT\n timestamp,\n close,\n CASE WHEN change > 0 THEN change ELSE 0 END AS gain,\n CASE WHEN change < 0 THEN -change ELSE 0 END AS loss\n FROM changes\n), smoothed AS (\n SELECT\n timestamp,\n close,\n 100 --avg(gain, 'period', 14) OVER w \n AS avg_gain,\n 100 -- avg(loss, 'period', 14) OVER w \n AS avg_loss\n FROM gains_losses\n WINDOW w AS (ORDER BY timestamp)\n)\nSELECT\n timestamp,\n close,\n CASE\n WHEN avg_loss = 0 THEN 100\n ELSE 100 - (100 / (1 + avg_gain / avg_loss))\n END AS rsi,\n 70.0 AS overbought,\n 30.0 AS oversold\nFROM smoothed\nORDER BY timestamp;" } ] diff --git a/tests/parser.test.ts b/tests/parser.test.ts index 4258198..194e5c9 100644 --- a/tests/parser.test.ts +++ b/tests/parser.test.ts @@ -12,6 +12,24 @@ describe("QuestDB Parser", () => { expect(result.ast[0].type).toBe("select") }) + it("should accept a trailing comma in the select list", () => { + const result = parseToAst( + "SELECT avg(price) avg1, avg(price) avg2, FROM trades", + ) + expect(result.errors).toHaveLength(0) + expect(result.ast).toHaveLength(1) + expect(result.ast[0].type).toBe("select") + if (result.ast[0].type === "select") { + expect(result.ast[0].columns).toHaveLength(2) + } + }) + + it("should accept a trailing comma after `SELECT *, ...`", () => { + const result = parseToAst("SELECT *, rank() OVER () r, FROM trades") + expect(result.errors).toHaveLength(0) + expect(result.ast).toHaveLength(1) + }) + it("should parse SELECT with WHERE clause", () => { const result = parseToAst( "SELECT symbol, price FROM trades WHERE price > 100", @@ -6915,4 +6933,162 @@ orders PIVOT (sum(amount) FOR status IN ('open'))` expect(toSql(result.ast[0])).toBe(sql) }) }) + + describe("WINDOW clause (named windows)", () => { + it("parses the basic named-window example from the docs", () => { + const sql = + "SELECT timestamp, symbol, price, avg(price) OVER w AS avg_price, min(price) OVER w AS min_price, max(price) OVER w AS max_price FROM trades WHERE timestamp IN '[$today]' AND symbol = 'BTC-USDT' WINDOW w AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW) LIMIT 100" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + expect(result.ast).toHaveLength(1) + const stmt = result.ast[0] + expect(stmt.type).toBe("select") + if (stmt.type === "select") { + expect(stmt.namedWindows).toHaveLength(1) + const w = stmt.namedWindows![0] + expect(w.name).toBe("w") + expect(w.orderBy).toHaveLength(1) + expect(w.frame?.mode).toBe("rows") + } + }) + + it("parses an empty window spec (WindowFunctionTest.testNamedWindowBasic)", () => { + const sql = + "SELECT x, y, row_number() OVER w as row_num FROM t WINDOW w AS ()" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows).toHaveLength(1) + const w = stmt.namedWindows![0] + expect(w.name).toBe("w") + expect(w.baseWindow).toBeUndefined() + expect(w.partitionBy).toBeUndefined() + expect(w.orderBy).toBeUndefined() + expect(w.frame).toBeUndefined() + } + }) + + it("parses multiple named windows separated by commas", () => { + const sql = + "SELECT timestamp, symbol, price, avg(price) OVER short_window AS avg_10, avg(price) OVER long_window AS avg_50 FROM trades WHERE symbol = 'BTC-USDT' WINDOW short_window AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW), long_window AS (ORDER BY timestamp ROWS BETWEEN 49 PRECEDING AND CURRENT ROW) LIMIT 100" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows).toHaveLength(2) + expect(stmt.namedWindows![0].name).toBe("short_window") + expect(stmt.namedWindows![1].name).toBe("long_window") + } + }) + + it("parses mixed inline OVER and named window references", () => { + const sql = + "SELECT timestamp, symbol, price, avg(price) OVER w AS moving_avg, row_number() OVER (PARTITION BY symbol ORDER BY timestamp) AS seq FROM trades WINDOW w AS (ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + }) + + it("parses window inheritance (WINDOW w2 AS (w1 ...))", () => { + const sql = + "SELECT avg(price) OVER w1 AS symbol_avg, avg(price) OVER w2 AS moving_avg FROM trades WHERE symbol = 'BTC-USDT' WINDOW w1 AS (ORDER BY timestamp), w2 AS (w1 ROWS BETWEEN 9 PRECEDING AND CURRENT ROW) LIMIT 100" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows).toHaveLength(2) + expect(stmt.namedWindows![0].name).toBe("w1") + expect(stmt.namedWindows![0].baseWindow).toBeUndefined() + expect(stmt.namedWindows![1].name).toBe("w2") + expect(stmt.namedWindows![1].baseWindow).toBe("w1") + expect(stmt.namedWindows![1].frame?.mode).toBe("rows") + } + }) + + it("parses a named window inside a CTE (docs example)", () => { + const sql = + "WITH price_stats AS (SELECT timestamp, symbol, price, avg(price) OVER w AS moving_avg, price - avg(price) OVER w AS deviation FROM trades WHERE symbol = 'BTC-USDT' WINDOW w AS (ORDER BY timestamp ROWS BETWEEN 19 PRECEDING AND CURRENT ROW)) SELECT * FROM price_stats WHERE deviation > 10 LIMIT 100" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + }) + + it("supports PARTITION BY in the window spec", () => { + const sql = + "SELECT avg(price) OVER w FROM trades WINDOW w AS (PARTITION BY symbol ORDER BY timestamp ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + const w = stmt.namedWindows![0] + expect(w.partitionBy).toHaveLength(1) + expect(w.orderBy).toHaveLength(1) + expect(w.frame?.mode).toBe("rows") + } + }) + + it("supports CUMULATIVE frame mode", () => { + const sql = + "SELECT avg(price) OVER w FROM trades WINDOW w AS (ORDER BY ts CUMULATIVE)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows![0].frame?.mode).toBe("cumulative") + } + }) + + it("supports EXCLUDE CURRENT ROW in the frame", () => { + const sql = + "SELECT avg(price) OVER w FROM trades WINDOW w AS (ORDER BY ts ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT ROW)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows![0].frame?.exclude).toBe("currentRow") + } + }) + + it("allows WINDOW clause before ORDER BY and LIMIT", () => { + const sql = + "SELECT avg(price) OVER w FROM trades WHERE symbol = 'BTC-USDT' WINDOW w AS (ORDER BY ts) ORDER BY symbol LIMIT 100" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const stmt = result.ast[0] + if (stmt.type === "select") { + expect(stmt.namedWindows).toHaveLength(1) + expect(stmt.orderBy).toBeDefined() + expect(stmt.limit).toBeDefined() + } + }) + + it("does not conflict with WINDOW JOIN (FROM clause)", () => { + const sql = + "SELECT t.price + 1, sum(q.price) + 10 FROM trades t WINDOW JOIN quotes q ON tag RANGE BETWEEN 1 SECOND PRECEDING AND CURRENT ROW EXCLUDE PREVAILING" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + }) + + it("round-trips a simple WINDOW clause through toSql", () => { + const sql = + "SELECT avg(price) OVER w FROM trades WINDOW w AS (ORDER BY ts ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const regenerated = toSql(result.ast[0]) + const reparsed = parseToAst(regenerated) + expect(reparsed.errors).toHaveLength(0) + // The AST shape should be preserved across the round trip. + expect(reparsed.ast[0]).toEqual(result.ast[0]) + }) + + it("round-trips window inheritance through toSql", () => { + const sql = + "SELECT avg(price) OVER w2 FROM trades WINDOW w1 AS (ORDER BY ts), w2 AS (w1 ROWS BETWEEN 9 PRECEDING AND CURRENT ROW)" + const result = parseToAst(sql) + expect(result.errors).toHaveLength(0) + const regenerated = toSql(result.ast[0]) + const reparsed = parseToAst(regenerated) + expect(reparsed.errors).toHaveLength(0) + expect(reparsed.ast[0]).toEqual(result.ast[0]) + }) + }) }) diff --git a/tests/recovery.test.ts b/tests/recovery.test.ts index 89e18cf..a2a9a3e 100644 --- a/tests/recovery.test.ts +++ b/tests/recovery.test.ts @@ -340,9 +340,13 @@ describe("Unexpected token recovery", () => { }) it("extra comma in column list", () => { + // QuestDB accepts a trailing comma in the select list, so for + // `SELECT a,, b FROM t` the parser takes the first comma as trailing + // (finishing the first SELECT with column `a`), then tries to recover + // from the orphan `, b FROM t`. A SELECT AST must still come out. const result = parseToAst("SELECT a,, b FROM t") - expect(result.errors.length).toBe(1) - expect(result.ast.length).toBe(1) + expect(result.errors.length).toBeGreaterThanOrEqual(1) + expect(result.ast.length).toBeGreaterThanOrEqual(1) expect(result.ast[0].type).toBe("select") }) diff --git a/yarn.lock b/yarn.lock index abd7e31..418a8c1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -422,7 +422,7 @@ __metadata: "@chevrotain/cst-dts-gen": "npm:^11.1.1" "@eslint/js": "npm:^10.0.1" "@types/node": "npm:^25.2.0" - chevrotain: "npm:^11.1.1" + chevrotain: "npm:11.1.1" eslint: "npm:^10.0.0" eslint-config-prettier: "npm:^10.1.8" eslint-plugin-prettier: "npm:^5.5.5" @@ -1016,7 +1016,7 @@ __metadata: languageName: node linkType: hard -"chevrotain@npm:^11.1.1": +"chevrotain@npm:11.1.1": version: 11.1.1 resolution: "chevrotain@npm:11.1.1" dependencies: