Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 231 additions & 1 deletion src/parse-utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
import { CHAR_ASTERISK, CHAR_FORWARD_SLASH, is_whitespace } from './string-utils'
import {
CHAR_ASTERISK,
CHAR_BACKSLASH,
CHAR_CARRIAGE_RETURN,
CHAR_DOUBLE_QUOTE,
CHAR_FORWARD_SLASH,
CHAR_LEFT_BRACE,
CHAR_LEFT_PAREN,
CHAR_NEWLINE,
CHAR_RIGHT_PAREN,
CHAR_SEMICOLON,
CHAR_SINGLE_QUOTE,
is_whitespace,
} from './string-utils'

/**
* Skip whitespace forward from a position
Expand Down Expand Up @@ -134,3 +147,220 @@ export function trim_boundaries(
if (start >= end) return null
return [start, end]
}

/**
* Raw character scan to locate the opening `{` of a CSS block, without full
* tokenization. Handles strings and comments to avoid false matches, and
* tracks newlines so the caller can reposition a Lexer accurately afterward.
*
* Returns `[pos, line, line_offset]` where `pos` is the position of `{`
* (or `source.length` if not found), and `line`/`line_offset` are the
* line-tracking state at that position.
* @internal
*/
export function scan_to_open_brace(
source: string,
pos: number,
line: number,
line_offset: number,
): [number, number, number] {
const len = source.length
let i = pos

while (i < len) {
const ch = source.charCodeAt(i)

if (ch === CHAR_LEFT_BRACE) return [i, line, line_offset]

// Comments: /* ... */
if (ch === CHAR_FORWARD_SLASH && i + 1 < len && source.charCodeAt(i + 1) === CHAR_ASTERISK) {
i += 2
while (i < len) {
const c = source.charCodeAt(i)
if (c === CHAR_ASTERISK && i + 1 < len && source.charCodeAt(i + 1) === CHAR_FORWARD_SLASH) {
i += 2
break
}
if (c === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (c === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
}
continue
}

// Strings: '...' or "..."
if (ch === CHAR_SINGLE_QUOTE || ch === CHAR_DOUBLE_QUOTE) {
const quote = ch
i++
while (i < len) {
const c = source.charCodeAt(i)
if (c === quote) {
i++
break
}
if (c === CHAR_BACKSLASH) {
i++ // skip escaped char
} else if (c === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (c === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
}
continue
}

// Backslash escape outside strings (e.g. \{ in a selector)
if (ch === CHAR_BACKSLASH && i + 1 < len) {
i++
const next = source.charCodeAt(i)
if (next === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (next === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
continue
}

if (ch === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (ch === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}

i++
}

return [i, line, line_offset]
}

/**
* Raw character scan to locate the end of an at-rule prelude: `{` or `;`
* (whichever comes first at parenthesis depth 0). Handles strings, comments,
* paren depth (so semicolons inside `url(data:...;...)` are skipped), and
* tracks newlines so the caller can reposition a Lexer accurately afterward.
*
* Returns `[pos, line, line_offset]` at the position of the boundary character.
* @internal
*/
export function scan_to_block_or_semi(
source: string,
pos: number,
line: number,
line_offset: number,
): [number, number, number] {
const len = source.length
let i = pos
let depth = 0

while (i < len) {
const ch = source.charCodeAt(i)

if (depth === 0 && (ch === CHAR_LEFT_BRACE || ch === CHAR_SEMICOLON)) {
return [i, line, line_offset]
}

if (ch === CHAR_LEFT_PAREN) {
depth++
i++
continue
}
if (ch === CHAR_RIGHT_PAREN) {
if (depth > 0) depth--
i++
continue
}

// Comments
if (ch === CHAR_FORWARD_SLASH && i + 1 < len && source.charCodeAt(i + 1) === CHAR_ASTERISK) {
i += 2
while (i < len) {
const c = source.charCodeAt(i)
if (c === CHAR_ASTERISK && i + 1 < len && source.charCodeAt(i + 1) === CHAR_FORWARD_SLASH) {
i += 2
break
}
if (c === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (c === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
}
continue
}

// Strings
if (ch === CHAR_SINGLE_QUOTE || ch === CHAR_DOUBLE_QUOTE) {
const quote = ch
i++
while (i < len) {
const c = source.charCodeAt(i)
if (c === quote) {
i++
break
}
if (c === CHAR_BACKSLASH) {
i++
} else if (c === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (c === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
}
continue
}

// Backslash escape
if (ch === CHAR_BACKSLASH && i + 1 < len) {
i++
const next = source.charCodeAt(i)
if (next === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (next === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}
i++
continue
}

if (ch === CHAR_NEWLINE) {
line++
line_offset = i + 1
} else if (ch === CHAR_CARRIAGE_RETURN) {
line++
if (i + 1 < len && source.charCodeAt(i + 1) === CHAR_NEWLINE) i++
line_offset = i + 1
}

i++
}

return [i, line, line_offset]
}
75 changes: 44 additions & 31 deletions src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@ import {
TOKEN_RIGHT_BRACKET,
TOKEN_COMMA,
TOKEN_COLON,
TOKEN_FUNCTION,
} from './token-types'
import { trim_boundaries } from './parse-utils'
import {
trim_boundaries,
scan_to_open_brace,
scan_to_block_or_semi,
skip_whitespace_and_comments_backward,
} from './parse-utils'
import {
CHAR_PERIOD,
CHAR_GREATER_THAN,
Expand Down Expand Up @@ -275,12 +279,22 @@ export class Parser {
let selector_line = this.lexer.token_line
let selector_column = this.lexer.token_column

// Consume tokens until we hit '{'
let last_end = this.lexer.token_end
while (!this.is_eof() && this.peek_type() !== TOKEN_LEFT_BRACE) {
last_end = this.lexer.token_end
this.next_token()
}
// Raw scan to find '{' without tokenizing the selector range.
// SelectorParser will be the sole tokenizer of this range.
let [brace_pos, brace_line, brace_line_offset] = scan_to_open_brace(
this.source,
selector_start,
selector_line,
selector_start - selector_column + 1,
)

// Trim trailing whitespace/comments before '{' to get the true selector end,
// matching what the old token-scan loop produced via next_token_fast(true).
let last_end = skip_whitespace_and_comments_backward(this.source, brace_pos, selector_start)

// Reposition main lexer at '{' and read it as a token
this.lexer.seek(brace_pos, brace_line, brace_pos - brace_line_offset + 1)
this.lexer.next_token_fast(false)

// If detailed selector parsing is enabled, use SelectorParser
if (this.parse_selectors_enabled && this.selector_parser) {
Expand All @@ -297,14 +311,13 @@ export class Parser {

// Create node: RAW when parsing disabled, SELECTOR_LIST as error fallback
let node_type = this.parse_selectors_enabled ? SELECTOR_LIST : RAW
let selector_node = this.arena.create_node(
return this.arena.create_node(
node_type,
selector_start,
last_end - selector_start,
selector_line,
selector_column,
)
return selector_node
}

// Parse a declaration: property: value;
Expand Down Expand Up @@ -383,28 +396,28 @@ export class Parser {

// Track prelude start and end
let prelude_start = this.lexer.token_start
let prelude_end = prelude_start
// Track parenthesis depth to handle semicolons inside functions (e.g., url(data:image/png;base64,...))
// NOTE: Same pattern exists in parse-declaration.ts for value parsing - keep in sync
let paren_depth = 0

// Parse prelude (everything before '{' or ';')
while (!this.is_eof()) {
let token_type = this.peek_type()

// Track parenthesis depth
if (token_type === TOKEN_LEFT_PAREN || token_type === TOKEN_FUNCTION) {
paren_depth++
} else if (token_type === TOKEN_RIGHT_PAREN) {
paren_depth--
}
let prelude_line = this.lexer.token_line
let prelude_column = this.lexer.token_column

// Raw scan to find '{' or ';' without tokenizing the prelude range.
// AtRulePreludeParser will be the sole tokenizer of this range.
// Paren depth is tracked to correctly skip semicolons inside url(...).
let [boundary_pos, boundary_line, boundary_line_offset] = scan_to_block_or_semi(
this.source,
prelude_start,
prelude_line,
prelude_start - prelude_column + 1,
)
// Trim trailing whitespace/comments before the boundary to match the old loop
let prelude_end = skip_whitespace_and_comments_backward(
this.source,
boundary_pos,
prelude_start,
)

// Only break on '{' or ';' when outside all parentheses
if (token_type === TOKEN_LEFT_BRACE && paren_depth === 0) break
if (token_type === TOKEN_SEMICOLON && paren_depth === 0) break
prelude_end = this.lexer.token_end
this.next_token()
}
// Reposition main lexer at '{' or ';' and read it as a token
this.lexer.seek(boundary_pos, boundary_line, boundary_pos - boundary_line_offset + 1)
this.lexer.next_token_fast(false)

// Store prelude position (trimmed)
let trimmed = trim_boundaries(this.source, prelude_start, prelude_end)
Expand Down
5 changes: 5 additions & 0 deletions src/string-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ export const CHAR_DOLLAR = 0x24 // $
export const CHAR_CARET = 0x5e // ^
export const CHAR_COLON = 0x3a // :
export const CHAR_LESS_THAN = 0x3c // <
export const CHAR_SEMICOLON = 0x3b // ;
export const CHAR_LEFT_BRACE = 0x7b // {
export const CHAR_LEFT_PAREN = 0x28 // (
export const CHAR_RIGHT_PAREN = 0x29 // )
export const CHAR_BACKSLASH = 0x5c // \

/**
* Check if a character code is whitespace (space, tab, newline, CR, or FF)
Expand Down
Loading