diff --git a/.changeset/tidy-pets-shout.md b/.changeset/tidy-pets-shout.md new file mode 100644 index 0000000000..1edc7eaa4a --- /dev/null +++ b/.changeset/tidy-pets-shout.md @@ -0,0 +1,6 @@ +--- +'@hyperdx/api': patch +'@hyperdx/app': patch +--- + +fix: use block_number/block_offset to uniquely identify log rows diff --git a/docker/otel-collector/schema/seed/00002_otel_logs.sql b/docker/otel-collector/schema/seed/00002_otel_logs.sql index 5cc414ed76..3f7fb36669 100644 --- a/docker/otel-collector/schema/seed/00002_otel_logs.sql +++ b/docker/otel-collector/schema/seed/00002_otel_logs.sql @@ -39,5 +39,5 @@ PARTITION BY toDate(TimestampTime) PRIMARY KEY (ServiceName, TimestampTime) ORDER BY (ServiceName, TimestampTime, Timestamp) TTL TimestampTime + ${TABLES_TTL} -SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; +SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, enable_block_number_column = 1, enable_block_offset_column = 1; diff --git a/packages/api/src/models/source.ts b/packages/api/src/models/source.ts index e95ee92072..2646cd2997 100644 --- a/packages/api/src/models/source.ts +++ b/packages/api/src/models/source.ts @@ -133,7 +133,6 @@ export const LogSource = Source.discriminator( traceIdExpression: String, spanIdExpression: String, implicitColumnExpression: String, - uniqueRowIdExpression: String, /** @deprecated See LogSourceSchema in @hyperdx/common-utils/types.ts. */ tableFilterExpression: String, highlightedTraceAttributeExpressions: { diff --git a/packages/app/src/components/DBRowTable.tsx b/packages/app/src/components/DBRowTable.tsx index cdfb5c56c5..ba7e8bed41 100644 --- a/packages/app/src/components/DBRowTable.tsx +++ b/packages/app/src/components/DBRowTable.tsx @@ -75,7 +75,7 @@ import { useRenderedSqlChartConfig, } from '@/hooks/useChartConfig'; import { useCsvExport } from '@/hooks/useCsvExport'; -import { useTableMetadata } from '@/hooks/useMetadata'; +import { useColumns, useTableMetadata } from '@/hooks/useMetadata'; import useOffsetPaginatedQuery from '@/hooks/useOffsetPaginatedQuery'; import { useGroupedPatterns } from '@/hooks/usePatterns'; import useRowWhere, { @@ -1362,14 +1362,15 @@ export const RawLogTable = memo( }, ); -export function appendSelectWithPrimaryAndPartitionKey( +export function appendSelectWithAdditionalKeys( select: SelectList, primaryKeys: string, partitionKey: string, + extraKeys: string[] = [], ): { select: SelectList; additionalKeysLength: number } { const partitionKeyArr = extractColumnReferencesFromKey(partitionKey); const primaryKeyArr = extractColumnReferencesFromKey(primaryKeys); - const allKeys = new Set([...partitionKeyArr, ...primaryKeyArr]); + const allKeys = new Set([...partitionKeyArr, ...primaryKeyArr, ...extraKeys]); if (typeof select === 'string') { const selectSplit = splitAndTrimWithBracket(select); const selectColumns = new Set(selectSplit); @@ -1395,8 +1396,9 @@ function getSelectLength(select: SelectList): number { } } -export function useConfigWithPrimaryAndPartitionKey( +export function useConfigWithAdditionalSelect( config: BuilderChartConfigWithDateRange, + sourceId?: string, ) { const { data: tableMetadata } = useTableMetadata({ databaseName: config.from.databaseName, @@ -1404,24 +1406,50 @@ export function useConfigWithPrimaryAndPartitionKey( connectionId: config.connection, }); + // Only check for row-ID columns for row-level queries (sourceId present). + // Skip for aggregate queries (e.g. patterns) where extra keys are irrelevant. + const { data: columns } = useColumns( + { + databaseName: config.from.databaseName, + tableName: config.from.tableName, + connectionId: config.connection, + }, + { enabled: !!sourceId }, + ); + const primaryKey = tableMetadata?.primary_key; const partitionKey = tableMetadata?.partition_key; - const mergedConfig = useMemo(() => { + return useMemo(() => { if (primaryKey == null || partitionKey == null) { return undefined; } - const { select, additionalKeysLength } = - appendSelectWithPrimaryAndPartitionKey( - config.select, - primaryKey, - partitionKey, - ); - return { ...config, select, additionalKeysLength }; - }, [primaryKey, partitionKey, config]); + let extraKeys: string[] = []; + + if (sourceId) { + const engineFull = tableMetadata?.engine_full ?? ''; - return mergedConfig; + const hasBlockColumns = + engineFull.includes('enable_block_number_column = 1') && + engineFull.includes('enable_block_offset_column = 1'); + + if (hasBlockColumns) { + extraKeys = ['_block_number', '_block_offset']; + } else if (columns?.some(c => c.name === '__hdx_id')) { + extraKeys = ['__hdx_id']; + } + } + + const { select, additionalKeysLength } = appendSelectWithAdditionalKeys( + config.select, + primaryKey, + partitionKey, + extraKeys, + ); + + return { ...config, select, additionalKeysLength }; + }, [primaryKey, partitionKey, config, tableMetadata, columns, sourceId]); } function selectColumnMapWithoutAdditionalKeys( @@ -1552,7 +1580,7 @@ function DBSqlRowTableComponent({ return base; }, [me, config, orderByArray]); - const mergedConfig = useConfigWithPrimaryAndPartitionKey(mergedConfigObj); + const mergedConfig = useConfigWithAdditionalSelect(mergedConfigObj, sourceId); const { data, fetchNextPage, hasNextPage, isFetching, isError, error } = useOffsetPaginatedQuery(mergedConfig ?? config, { diff --git a/packages/app/src/components/Sources/SourceForm.tsx b/packages/app/src/components/Sources/SourceForm.tsx index 57186fe4f4..47600e9c38 100644 --- a/packages/app/src/components/Sources/SourceForm.tsx +++ b/packages/app/src/components/Sources/SourceForm.tsx @@ -1256,21 +1256,6 @@ function LogTableModelForm(props: TableModelProps) { - {/* - - */} {/* { }); }); -describe('appendSelectWithPrimaryAndPartitionKey', () => { +describe('appendSelectWithAdditionalKeys', () => { it('should extract columns from partition key with nested function call', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', 'id, created_at', ' toStartOfInterval(timestamp, toIntervalDay(3))', @@ -159,7 +159,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract no columns from empty primary key and partition key', () => { - const result = appendSelectWithPrimaryAndPartitionKey('col1, col2', '', ''); + const result = appendSelectWithAdditionalKeys('col1, col2', '', '', []); expect(result).toEqual({ additionalKeysLength: 0, select: 'col1,col2', @@ -167,7 +167,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract columns from complex primary key', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', 'id, timestamp, toStartOfInterval(timestamp2, toIntervalDay(3))', "toStartOfInterval(timestamp, toIntervalDay(3)), date_diff('DAY', col3, col4), now(), toDate(col5 + INTERVAL 1 DAY)", @@ -179,7 +179,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract map columns', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `map['key']`, `map2['key'], map1['key3 ']`, @@ -191,7 +191,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract map columns', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', ``, `map2['key.2']`, @@ -203,7 +203,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract array columns', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `array[1]`, `array[2], array[3]`, @@ -215,7 +215,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract json columns', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `json.b`, `json.a, json.b.c, toStartOfDay(timestamp, json_2.d)`, @@ -227,7 +227,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should extract json columns with type specifiers', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `json.b.:Int64`, `toStartOfDay(json.a.b.:DateTime)`, @@ -239,7 +239,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should skip json columns with hard-to-parse type specifiers', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `json.b.:Array(String), col3`, ``, @@ -251,7 +251,7 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { }); it('should skip nested map references', () => { - const result = appendSelectWithPrimaryAndPartitionKey( + const result = appendSelectWithAdditionalKeys( 'col1, col2', `map['key']['key2'], col3`, ``, @@ -261,4 +261,53 @@ describe('appendSelectWithPrimaryAndPartitionKey', () => { select: `col1,col2,col3`, }); }); + + it('should append extraKeys to string select', () => { + const result = appendSelectWithAdditionalKeys('col1, col2', 'id', '', [ + '__hdx_id', + ]); + expect(result).toEqual({ + additionalKeysLength: 2, + select: 'col1,col2,id,__hdx_id', + }); + }); + + it('should not duplicate extraKeys already in select', () => { + const result = appendSelectWithAdditionalKeys('col1, __hdx_id', 'id', '', [ + '__hdx_id', + ]); + expect(result).toEqual({ + additionalKeysLength: 1, + select: 'col1,__hdx_id,id', + }); + }); + + it('should deduplicate extraKeys that overlap with primary/partition keys', () => { + const result = appendSelectWithAdditionalKeys('col1, col2', 'id', '', [ + 'id', + '__hdx_id', + ]); + expect(result).toEqual({ + additionalKeysLength: 2, + select: 'col1,col2,id,__hdx_id', + }); + }); + + it('should append extraKeys to array-style select', () => { + const result = appendSelectWithAdditionalKeys( + [{ valueExpression: 'col1' }, { valueExpression: 'col2' }], + 'id', + '', + ['__hdx_id'], + ); + expect(result).toEqual({ + additionalKeysLength: 2, + select: [ + { valueExpression: 'col1' }, + { valueExpression: 'col2' }, + { valueExpression: 'id' }, + { valueExpression: '__hdx_id' }, + ], + }); + }); }); diff --git a/packages/app/src/hooks/usePatterns.tsx b/packages/app/src/hooks/usePatterns.tsx index 476581eb9e..6911e347f9 100644 --- a/packages/app/src/hooks/usePatterns.tsx +++ b/packages/app/src/hooks/usePatterns.tsx @@ -5,7 +5,7 @@ import { BuilderChartConfigWithDateRange } from '@hyperdx/common-utils/dist/type import { useQuery } from '@tanstack/react-query'; import { timeBucketByGranularity, toStartOfInterval } from '@/ChartUtils'; -import { useConfigWithPrimaryAndPartitionKey } from '@/components/DBRowTable'; +import { useConfigWithAdditionalSelect } from '@/components/DBRowTable'; import { useQueriedChartConfig } from '@/hooks/useChartConfig'; import { getFirstTimestampValueExpression } from '@/source'; @@ -134,7 +134,7 @@ function usePatterns({ statusCodeExpression?: string; enabled?: boolean; }) { - const configWithPrimaryAndPartitionKey = useConfigWithPrimaryAndPartitionKey({ + const configWithPrimaryAndPartitionKey = useConfigWithAdditionalSelect({ ...config, // TODO: User-configurable pattern columns and non-pattern/group by columns select: [ diff --git a/packages/common-utils/src/types.ts b/packages/common-utils/src/types.ts index 72f5239631..8f0d2e2956 100644 --- a/packages/common-utils/src/types.ts +++ b/packages/common-utils/src/types.ts @@ -1147,7 +1147,6 @@ export const LogSourceSchema = BaseSourceSchema.extend({ traceIdExpression: z.string().optional(), spanIdExpression: z.string().optional(), implicitColumnExpression: z.string().optional(), - uniqueRowIdExpression: z.string().optional(), /** * @deprecated Application-side SQL predicate AND'd into every query against * the source. Not a security boundary — bypassable by direct table SELECT.