Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions docker/otel-collector/schema/seed/00002_otel_logs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ CREATE TABLE IF NOT EXISTS ${DATABASE}.otel_logs
`ScopeVersion` LowCardinality(String) CODEC(ZSTD(1)),
`ScopeAttributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
`LogAttributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
`ResourceAttributeTokens` Array(String) ALIAS arrayMap((k,v) -> concat(k, '=', v), mapKeys(ResourceAttributes), mapValues(ResourceAttributes)),
`LogAttributeTokens` Array(String) ALIAS arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes)),
`ScopeAttributeTokens` Array(String) ALIAS arrayMap((k,v) -> concat(k, '=', v), mapKeys(ScopeAttributes), mapValues(ScopeAttributes)),
`__hdx_materialized_k8s.cluster.name` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.cluster.name'] CODEC(ZSTD(1)),
`__hdx_materialized_k8s.container.name` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.container.name'] CODEC(ZSTD(1)),
`__hdx_materialized_k8s.deployment.name` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.deployment.name'] CODEC(ZSTD(1)),
Expand All @@ -27,11 +30,11 @@ CREATE TABLE IF NOT EXISTS ${DATABASE}.otel_logs
`__hdx_materialized_deployment.environment.name` LowCardinality(String) MATERIALIZED ResourceAttributes['deployment.environment.name'] CODEC(ZSTD(1)),
INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_res_attr_kv_text ResourceAttributeTokens TYPE text(tokenizer = 'array'),
INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_kv_text ScopeAttributeTokens TYPE text(tokenizer = 'array')
INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_kv_text LogAttributeTokens TYPE text(tokenizer = 'array'),
INDEX idx_lower_body lower(Body) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8
)
ENGINE = MergeTree
Expand Down
319 changes: 319 additions & 0 deletions packages/common-utils/src/__tests__/queryParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,325 @@ describe('CustomSchemaSQLSerializerV2 - indexCoversColumn', () => {
);
});

describe('CustomSchemaSQLSerializerV2 - Map Tokens Skip-Index Optimization', () => {
const metadata = getMetadata(
new ClickhouseClient({ host: 'http://localhost:8123' }),
);
metadata.getColumn = jest.fn().mockImplementation(async ({ column }) => {
if (column === 'LogAttributes') {
return {
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
};
} else if (column === 'ResourceAttributes') {
return {
name: 'ResourceAttributes',
type: 'Map(LowCardinality(String), String)',
};
} else if (column === 'Body') {
return { name: 'Body', type: 'String' };
}
return undefined;
});
metadata.getMaterializedColumnsLookupTable = jest
.fn()
.mockResolvedValue(new Map());
metadata.getSetting = jest.fn().mockResolvedValue(undefined);

const databaseName = 'default';
const tableName = 'otel_logs';
const connectionId = 'test';

it('should use has(TokensColumn, key=value) for exact map matches when tokens column with text index exists', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'LogAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_log_attr_kv_text',
type: 'text',
typeFull: "text(tokenizer = 'array')",
expression: 'LogAttributeTokens',
granularity: 1,
},
]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

// Exact match: LogAttributes.userId:"abc123"
const result = await serializer.eq(
'LogAttributes.userId',
'abc123',
false,
{},
);
expect(result).toBe("(has(`LogAttributeTokens`, 'userId=abc123'))");
});

it('should fall back to map access for negated exact matches', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'LogAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_log_attr_kv_text',
type: 'text',
typeFull: "text(tokenizer = 'array')",
expression: 'LogAttributeTokens',
granularity: 1,
},
]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

// Negated: -LogAttributes.userId:"abc123"
const result = await serializer.eq(
'LogAttributes.userId',
'abc123',
true,
{},
);
expect(result).toBe("(`LogAttributes`['userId'] != 'abc123')");
});

it('should fall back to map access when no tokens column exists', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

const result = await serializer.eq(
'LogAttributes.userId',
'abc123',
false,
{},
);
expect(result).toBe(
"(`LogAttributes`['userId'] = 'abc123' AND indexHint(mapContains(`LogAttributes`, 'userId')))",
);
});

it('should fall back to map access when tokens column exists but has no text index', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'LogAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

const result = await serializer.eq(
'LogAttributes.userId',
'abc123',
false,
{},
);
expect(result).toBe(
"(`LogAttributes`['userId'] = 'abc123' AND indexHint(mapContains(`LogAttributes`, 'userId')))",
);
});

it('should handle values containing equals signs', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'LogAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_log_attr_kv_text',
type: 'text',
typeFull: "text(tokenizer = 'array')",
expression: 'LogAttributeTokens',
granularity: 1,
},
]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

// Value with = should still work (key doesn't contain =)
const result = await serializer.eq(
'LogAttributes.userId',
'abc=123',
false,
{},
);
expect(result).toBe("(has(`LogAttributeTokens`, 'userId=abc=123'))");
});

it('should use tokens optimization via SearchQueryBuilder for quoted map field searches', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'LogAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'LogAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(LogAttributes), mapValues(LogAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_log_attr_kv_text',
type: 'text',
typeFull: "text(tokenizer = 'array')",
expression: 'LogAttributeTokens',
granularity: 1,
},
]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

// Lucene: LogAttributes.error.message:"Failed to fetch"
const builder = new SearchQueryBuilder(
'LogAttributes.error.message:"Failed to fetch"',
serializer,
);
const sql = await builder.build();
expect(sql).toBe(
"((has(`LogAttributeTokens`, 'error.message=Failed to fetch')))",
);
});

it('should work with ResourceAttributes tokens column', async () => {
metadata.getColumns = jest.fn().mockResolvedValue([
{
name: 'ResourceAttributes',
type: 'Map(LowCardinality(String), String)',
default_type: '',
default_expression: '',
},
{
name: 'ResourceAttributeTokens',
type: 'Array(String)',
default_type: 'ALIAS',
default_expression:
"arrayMap((k,v) -> concat(k, '=', v), mapKeys(ResourceAttributes), mapValues(ResourceAttributes))",
},
]);
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_res_attr_kv_text',
type: 'text',
typeFull: "text(tokenizer = 'array')",
expression: 'ResourceAttributeTokens',
granularity: 1,
},
]);

const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});

const result = await serializer.eq(
'ResourceAttributes.service.name',
'my-service',
false,
{},
);
expect(result).toBe(
"(has(`ResourceAttributeTokens`, 'service.name=my-service'))",
);
});
});

describe('CustomSchemaSQLSerializerV2 - Array and Nested Fields', () => {
const metadata = getMetadata(
new ClickhouseClient({ host: 'http://localhost:8123' }),
Expand Down
Loading
Loading