diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/AlpEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/AlpEncodingDecoderTest.java new file mode 100644 index 00000000..f2778af2 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/AlpEncodingDecoderTest.java @@ -0,0 +1,174 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.proto.ALPMetadata; +import io.github.dfa1.vortex.proto.PatchesMetadata; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.DoubleArray; +import io.github.dfa1.vortex.reader.array.FloatArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assertions.within; + +class AlpEncodingDecoderTest { + + private static final AlpEncodingDecoder SUT = new AlpEncodingDecoder(); + private static final ReadRegistry REGISTRY = TestRegistry.ofDecoders(SUT, new PrimitiveEncodingDecoder()); + + private static final DType F64 = new DType.Primitive(PType.F64, false); + private static final DType F32 = new DType.Primitive(PType.F32, false); + + private static MemorySegment leLongs(long... vs) { + byte[] b = new byte[vs.length * 8]; + ByteBuffer bb = ByteBuffer.wrap(b).order(ByteOrder.LITTLE_ENDIAN); + for (long v : vs) { + bb.putLong(v); + } + return MemorySegment.ofArray(b); + } + + private static MemorySegment leInts(int... vs) { + byte[] b = new byte[vs.length * 4]; + ByteBuffer bb = ByteBuffer.wrap(b).order(ByteOrder.LITTLE_ENDIAN); + for (int v : vs) { + bb.putInt(v); + } + return MemorySegment.ofArray(b); + } + + private static MemorySegment leDoubles(double... vs) { + byte[] b = new byte[vs.length * 8]; + ByteBuffer bb = ByteBuffer.wrap(b).order(ByteOrder.LITTLE_ENDIAN); + for (double v : vs) { + bb.putDouble(v); + } + return MemorySegment.ofArray(b); + } + + @Test + void accepts_floatsTrue_otherFalse() { + // Given / When / Then + assertThat(SUT.accepts(F64)).isTrue(); + assertThat(SUT.accepts(F32)).isTrue(); + assertThat(SUT.accepts(new DType.Primitive(PType.I64, false))).isFalse(); + assertThat(SUT.accepts(new DType.Utf8(false))).isFalse(); + } + + @Test + void decode_nonPrimitiveDtype_throws() { + // Given a Utf8 dtype on an ALP node + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, ByteBuffer.wrap(new ALPMetadata(0, 0, null).encode()), + new ArrayNode[0], new int[0]); + DecodeContext ctx = new DecodeContext(node, new DType.Utf8(false), 1, + new MemorySegment[0], REGISTRY, Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(ctx)).hasMessageContaining("expected primitive dtype"); + } + + @Test + void decode_missingMetadata_defaultsToZeroExponents() { + // Given no metadata — decoder falls back to exp_e=0, exp_f=0 (scale 1.0) + ArrayNode enc = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, null, new ArrayNode[]{enc}, new int[0]); + DecodeContext ctx = new DecodeContext(node, F64, 2, new MemorySegment[]{leLongs(5L, 7L)}, REGISTRY, Arena.ofAuto()); + + // When + DoubleArray result = (DoubleArray) SUT.decode(ctx); + + // Then + assertThat(result.getDouble(0)).isCloseTo(5.0, within(1e-9)); + assertThat(result.getDouble(1)).isCloseTo(7.0, within(1e-9)); + } + + @Test + void decode_f64_broadcastNoPatches_returnsConstant() { + // Given a single encoded value but 4 logical rows (capacity < n) and no patches: + // the decoder broadcasts it into a constant array + ArrayNode enc = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + byte[] meta = new ALPMetadata(2, 0, null).encode(); // exp_e=2 -> *0.01 + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, ByteBuffer.wrap(meta), new ArrayNode[]{enc}, new int[0]); + DecodeContext ctx = new DecodeContext(node, F64, 4, new MemorySegment[]{leLongs(123L)}, REGISTRY, Arena.ofAuto()); + + // When + DoubleArray result = (DoubleArray) SUT.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(4); + for (int i = 0; i < 4; i++) { + assertThat(result.getDouble(i)).as("index %d", i).isCloseTo(1.23, within(1e-9)); + } + } + + @Test + void decode_f32_broadcastNoPatches_returnsConstant() { + // Given single value, 3 rows, no patches + ArrayNode enc = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + byte[] meta = new ALPMetadata(1, 0, null).encode(); // exp_e=1 -> *0.1 + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, ByteBuffer.wrap(meta), new ArrayNode[]{enc}, new int[0]); + DecodeContext ctx = new DecodeContext(node, F32, 3, new MemorySegment[]{leInts(25)}, REGISTRY, Arena.ofAuto()); + + // When + FloatArray result = (FloatArray) SUT.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(3); + for (int i = 0; i < 3; i++) { + assertThat(result.getFloat(i)).as("index %d", i).isCloseTo(2.5f, within(1e-6f)); + } + } + + @Test + void decode_f64_patches_withU8Indices() { + // Given patches whose index child uses U8 storage — exercises the U8 arm of + // readUnsigned (the encoder always emits U32 indices) + PatchesMetadata pm = new PatchesMetadata(1L, 0L, io.github.dfa1.vortex.proto.PType.U8, null, null, null); + byte[] meta = new ALPMetadata(2, 0, pm).encode(); // *0.01 + + ArrayNode enc = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode idx = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode val = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{2}); + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, ByteBuffer.wrap(meta), + new ArrayNode[]{enc, idx, val}, new int[0]); + + MemorySegment idxSeg = MemorySegment.ofArray(new byte[]{1}); // patch row 1 + MemorySegment[] segs = {leLongs(100L, 0L, 300L), idxSeg, leDoubles(9.0)}; + DecodeContext ctx = new DecodeContext(node, F64, 3, segs, REGISTRY, Arena.ofAuto()); + + // When + DoubleArray result = (DoubleArray) SUT.decode(ctx); + + // Then + assertThat(result.getDouble(0)).isCloseTo(1.0, within(1e-9)); + assertThat(result.getDouble(1)).isCloseTo(9.0, within(1e-9)); // patched + assertThat(result.getDouble(2)).isCloseTo(3.0, within(1e-9)); + } + + @Test + void decode_patches_nonUnsignedIndexPtype_throws() { + // Given a signed (I32) patch-index ptype — readUnsigned rejects it + PatchesMetadata pm = new PatchesMetadata(1L, 0L, io.github.dfa1.vortex.proto.PType.I32, null, null, null); + byte[] meta = new ALPMetadata(2, 0, pm).encode(); + + ArrayNode enc = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode idx = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode val = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{2}); + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ALP, ByteBuffer.wrap(meta), + new ArrayNode[]{enc, idx, val}, new int[0]); + + MemorySegment[] segs = {leLongs(100L, 0L), leInts(1), leDoubles(9.0)}; + DecodeContext ctx = new DecodeContext(node, F64, 2, segs, REGISTRY, Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(ctx)).hasMessageContaining("non-unsigned patch index ptype"); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DateTimePartsEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DateTimePartsEncodingDecoderTest.java new file mode 100644 index 00000000..928e9d2f --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DateTimePartsEncodingDecoderTest.java @@ -0,0 +1,133 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.TestSegments; +import io.github.dfa1.vortex.encoding.TimeUnit; +import io.github.dfa1.vortex.proto.DateTimePartsMetadata; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.LongArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class DateTimePartsEncodingDecoderTest { + + private static final DateTimePartsEncodingDecoder SUT = new DateTimePartsEncodingDecoder(); + private static final ReadRegistry REGISTRY = TestRegistry.ofDecoders(SUT, new PrimitiveEncodingDecoder()); + + private static final long SECONDS_PER_DAY = 86_400L; + + private static ByteBuffer i64Meta() { + return ByteBuffer.wrap(new DateTimePartsMetadata( + io.github.dfa1.vortex.proto.PType.I64, + io.github.dfa1.vortex.proto.PType.I64, + io.github.dfa1.vortex.proto.PType.I64).encode()); + } + + private static DType timestampDType(TimeUnit unit, boolean nullable) { + ByteBuffer meta = ByteBuffer.allocate(3).order(ByteOrder.LITTLE_ENDIAN); + meta.put((byte) unit.ordinal()); + meta.putShort((short) 0); + meta.flip(); + return new DType.Extension("vortex.timestamp", + new DType.Primitive(PType.I64, nullable), meta, nullable); + } + + /// Builds a context with three I64 part-children backed by the given segments. + private static DecodeContext ctx(ByteBuffer meta, DType dtype, long n, + MemorySegment days, MemorySegment seconds, MemorySegment subseconds) { + ArrayNode d = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode s = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode ss = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{2}); + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_DATETIMEPARTS, meta, new ArrayNode[]{d, s, ss}, new int[0]); + return new DecodeContext(node, dtype, n, new MemorySegment[]{days, seconds, subseconds}, REGISTRY, Arena.ofAuto()); + } + + @Test + void encodingId_isVortexDateTimeParts() { + // Given / When / Then + assertThat(SUT.encodingId()).isEqualTo(EncodingId.VORTEX_DATETIMEPARTS); + } + + @Test + void decode_missingMetadata_throws() { + // Given a node with no metadata + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_DATETIMEPARTS, null, new ArrayNode[0], new int[0]); + DecodeContext c = new DecodeContext(node, timestampDType(TimeUnit.Milliseconds, false), 1, + new MemorySegment[0], REGISTRY, Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(c)).hasMessageContaining("missing metadata"); + } + + @Test + void decode_milliseconds_reassemblesParts() { + // Given 1 day + 1h2m3s + 456ms split across the three parts + long ts = 86_400_000L + 3723L * 1000L + 456L; + DecodeContext c = ctx(i64Meta(), timestampDType(TimeUnit.Milliseconds, false), 1, + TestSegments.leLongs(1L), TestSegments.leLongs(3723L), TestSegments.leLongs(456L)); + + // When + LongArray result = (LongArray) SUT.decode(c); + + // Then + assertThat(result.getLong(0)).isEqualTo(ts); + } + + @Test + void decode_daysUnit_usesUnitsPerSecondOne() { + // Given a Days-unit extension: divisor() throws for Days, so the decoder must + // special-case it to unitsPerSecond=1 (days only, sub-day parts zero) + DecodeContext c = ctx(i64Meta(), timestampDType(TimeUnit.Days, false), 1, + TestSegments.leLongs(2L), TestSegments.leLongs(0L), TestSegments.leLongs(0L)); + + // When + LongArray result = (LongArray) SUT.decode(c); + + // Then 2 days * 86400 s/day * 1 unit/s + assertThat(result.getLong(0)).isEqualTo(2L * SECONDS_PER_DAY); + } + + @Test + void decode_nullableExtension_decodesNullableDaysChild() { + // Given a nullable extension dtype — the days child is decoded as nullable + DecodeContext c = ctx(i64Meta(), timestampDType(TimeUnit.Milliseconds, true), 1, + TestSegments.leLongs(0L), TestSegments.leLongs(0L), TestSegments.leLongs(0L)); + + // When + LongArray result = (LongArray) SUT.decode(c); + + // Then + assertThat(result.getLong(0)).isZero(); + } + + @Test + void decode_extensionMissingTimeUnitMetadata_throws() { + // Given an extension whose metadata byte is absent + DType noUnit = new DType.Extension("vortex.timestamp", + new DType.Primitive(PType.I64, false), null, false); + DecodeContext c = ctx(i64Meta(), noUnit, 1, + TestSegments.leLongs(0L), TestSegments.leLongs(0L), TestSegments.leLongs(0L)); + + // When / Then + assertThatThrownBy(() -> SUT.decode(c)).hasMessageContaining("missing TimeUnit metadata"); + } + + @Test + void decode_nonExtensionDtype_throws() { + // Given a primitive (non-extension) logical type + DecodeContext c = ctx(i64Meta(), new DType.Primitive(PType.I64, false), 1, + TestSegments.leLongs(0L), TestSegments.leLongs(0L), TestSegments.leLongs(0L)); + + // When / Then + assertThatThrownBy(() -> SUT.decode(c)).hasMessageContaining("expected Extension dtype"); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoderTest.java new file mode 100644 index 00000000..688b8b73 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoderTest.java @@ -0,0 +1,103 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PTypeIO; +import io.github.dfa1.vortex.encoding.TestSegments; +import io.github.dfa1.vortex.proto.DeltaMetadata; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.LongArray; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import static org.assertj.core.api.Assertions.assertThat; + +class DeltaEncodingDecoderTest { + + private static final DeltaEncodingDecoder SUT = new DeltaEncodingDecoder(); + private static final ReadRegistry REGISTRY = TestRegistry.ofDecoders(SUT, new PrimitiveEncodingDecoder()); + + private static final int FL_CHUNK_SIZE = 1024; + + @Test + void encodingId_isFastlanesDelta() { + // Given / When / Then + assertThat(SUT.encodingId()).isEqualTo(EncodingId.FASTLANES_DELTA); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64", "U8", "U16", "U32", "U64"}) + void decode_nullMetadata_returnsEmptyArray(PType ptype) { + // Given no metadata — the decoder defaults to deltas_len=0 and short-circuits + // to an empty array of the right ptype (a path the encoder never emits, since it + // always writes metadata) + ArrayNode node = ArrayNode.of(EncodingId.FASTLANES_DELTA, null, new ArrayNode[0], new int[0]); + DecodeContext ctx = new DecodeContext(node, new DType.Primitive(ptype, false), 0, + new MemorySegment[0], REGISTRY, Arena.ofAuto()); + + // When + Array result = SUT.decode(ctx); + + // Then + assertThat(result.length()).isZero(); + } + + @Test + void decode_constantChildren_broadcastsAcrossChunk() { + // Given a single delta chunk (1024 rows) whose bases and deltas children each hold + // a single element, as a ConstantEncoding child would. readLongs must broadcast the + // lone value across the whole chunk (capacity < count). Zero bases + zero deltas + // means every decoded row is zero. + PType ptype = PType.I64; + long deltasLen = FL_CHUNK_SIZE; + ByteBuffer meta = ByteBuffer.wrap(new DeltaMetadata(deltasLen, 0).encode()); + + ArrayNode bases = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode deltas = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode node = ArrayNode.of(EncodingId.FASTLANES_DELTA, meta, new ArrayNode[]{bases, deltas}, new int[0]); + + // one element each → broadcast + MemorySegment[] segs = {TestSegments.leLongs(0L), TestSegments.leLongs(0L)}; + DecodeContext ctx = new DecodeContext(node, new DType.Primitive(ptype, false), 4, segs, REGISTRY, Arena.ofAuto()); + + // When + LongArray result = (LongArray) SUT.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(4); + for (int i = 0; i < 4; i++) { + assertThat(result.getLong(i)).as("index %d", i).isZero(); + } + } + + @Test + void decode_constantBases_nonZeroOffsetAndBase() { + // Given a constant base of 5 broadcast across all 16 lanes with zero deltas: + // every row decodes to the base value 5. Reads from an offset into the chunk. + PType ptype = PType.I64; + ByteBuffer meta = ByteBuffer.wrap(new DeltaMetadata(FL_CHUNK_SIZE, 0).encode()); + + ArrayNode bases = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode deltas = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode node = ArrayNode.of(EncodingId.FASTLANES_DELTA, meta, new ArrayNode[]{bases, deltas}, new int[0]); + + MemorySegment[] segs = {TestSegments.leLongs(5L), TestSegments.leLongs(0L)}; + DecodeContext ctx = new DecodeContext(node, new DType.Primitive(ptype, false), 3, segs, REGISTRY, Arena.ofAuto()); + + // When + LongArray result = (LongArray) SUT.decode(ctx); + + // Then prefix-sum of zero deltas over base 5 stays 5 on lane 0 + assertThat(result.getLong(0)).isEqualTo(5L); + // sanity: materialized bytes are little-endian + MemorySegment seg = result.materialize(Arena.ofAuto()); + assertThat(seg.get(PTypeIO.LE_LONG, 0)).isEqualTo(5L); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinEncodingDecoderTest.java new file mode 100644 index 00000000..3b72e57b --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinEncodingDecoderTest.java @@ -0,0 +1,72 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.TestSegments; +import io.github.dfa1.vortex.proto.VarBinMetadata; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.VarBinArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +import static org.assertj.core.api.Assertions.assertThat; + +class VarBinEncodingDecoderTest { + + private static final VarBinEncodingDecoder SUT = new VarBinEncodingDecoder(); + private static final ReadRegistry REGISTRY = TestRegistry.ofDecoders(SUT, new PrimitiveEncodingDecoder()); + + private static ByteBuffer i32OffsetsMeta() { + return ByteBuffer.wrap(new VarBinMetadata(io.github.dfa1.vortex.proto.PType.I32).encode()); + } + + private static DecodeContext ctx(ByteBuffer meta, MemorySegment bytes, MemorySegment offsets, long n) { + // children[0] = offsets (primitive, segment index 1); bufferIndices[0] -> bytes (index 0) + ArrayNode offsetsNode = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{1}); + ArrayNode varbinNode = ArrayNode.of(EncodingId.VORTEX_VARBIN, meta, new ArrayNode[]{offsetsNode}, new int[]{0}); + return new DecodeContext(varbinNode, new DType.Utf8(false), n, + new MemorySegment[]{bytes, offsets}, REGISTRY, Arena.ofAuto()); + } + + @Test + void decode_i32Offsets_happyPath() { + // Given "a","b","c" with I32 offsets (the encoder defaults to I64, so this + // exercises the I32 offsets-ptype branch directly) + MemorySegment data = MemorySegment.ofArray("abc".getBytes(StandardCharsets.UTF_8)); + MemorySegment offsets = TestSegments.leInts(0, 1, 2, 3); + + // When + Array result = SUT.decode(ctx(i32OffsetsMeta(), data, offsets, 3)); + + // Then + VarBinArray arr = (VarBinArray) result; + assertThat(arr.length()).isEqualTo(3); + assertThat(arr.getBytes(0)).containsExactly('a'); + assertThat(arr.getBytes(1)).containsExactly('b'); + assertThat(arr.getBytes(2)).containsExactly('c'); + } + + @Test + void decode_broadcastOffsets_singleOffsetExpandsToAllRows() { + // Given an offsets child holding a single value (as ConstantEncoding emits): + // capacity 1 < n+1, so the decoder must broadcast-copy it. A constant offset + // means every row spans an empty slice. + MemorySegment data = Arena.ofAuto().allocate(1); + MemorySegment offsets = TestSegments.leInts(0); // one element only + + // When + Array result = SUT.decode(ctx(i32OffsetsMeta(), data, offsets, 3)); + + // Then + VarBinArray arr = (VarBinArray) result; + assertThat(arr.length()).isEqualTo(3); + for (int i = 0; i < 3; i++) { + assertThat(arr.getBytes(i)).as("index %d", i).isEmpty(); + } + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinViewEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinViewEncodingDecoderTest.java new file mode 100644 index 00000000..941ad931 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VarBinViewEncodingDecoderTest.java @@ -0,0 +1,91 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PTypeIO; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.VarBinArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.charset.StandardCharsets; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class VarBinViewEncodingDecoderTest { + + private static final VarBinViewEncodingDecoder SUT = new VarBinViewEncodingDecoder(); + + @Test + void encodingId_isVortexVarBinView() { + // Given / When / Then + assertThat(SUT.encodingId()).isEqualTo(EncodingId.VORTEX_VARBINVIEW); + } + + @Test + void accepts_utf8AndBinary_true_otherFalse() { + // Given / When / Then + assertThat(SUT.accepts(new DType.Utf8(false))).isTrue(); + assertThat(SUT.accepts(new DType.Binary(false))).isTrue(); + assertThat(SUT.accepts(new DType.Primitive(io.github.dfa1.vortex.core.PType.I32, false))).isFalse(); + } + + @Test + void decode_binaryDtype_inlineViews() { + // Given two short (inline) values under a Binary dtype — exercises the + // Binary branch the UTF8-only encoder tests never reach + Arena arena = Arena.ofAuto(); + byte[] a = "hi".getBytes(StandardCharsets.UTF_8); + byte[] b = "bye".getBytes(StandardCharsets.UTF_8); + MemorySegment views = arena.allocate(2 * 16); + writeInlineView(views, 0, a); + writeInlineView(views, 1, b); + + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_VARBINVIEW, null, new ArrayNode[0], new int[]{0}); + DecodeContext ctx = new DecodeContext(node, new DType.Binary(false), 2, + new MemorySegment[]{views}, ReadRegistry.empty(), arena); + + // When + Array result = SUT.decode(ctx); + + // Then + VarBinArray arr = (VarBinArray) result; + assertThat(arr.length()).isEqualTo(2); + assertThat(arr.getBytes(0)).containsExactly(a); + assertThat(arr.getBytes(1)).containsExactly(b); + } + + @Test + void decode_wrongDtype_throws() { + // Given a primitive dtype + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_VARBINVIEW, null, new ArrayNode[0], new int[]{0}); + DecodeContext ctx = new DecodeContext(node, new DType.Primitive(io.github.dfa1.vortex.core.PType.I32, false), + 0, new MemorySegment[]{Arena.ofAuto().allocate(16)}, ReadRegistry.empty(), Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(ctx)) + .hasMessageContaining("expected Utf8/Binary dtype"); + } + + @Test + void decode_noBuffers_throws() { + // Given a node with zero buffer indices + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_VARBINVIEW, null, new ArrayNode[0], new int[0]); + DecodeContext ctx = new DecodeContext(node, new DType.Utf8(false), 0, + new MemorySegment[0], ReadRegistry.empty(), Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(ctx)) + .hasMessageContaining("at least 1 buffer"); + } + + /// Writes a ≤12-byte inline view: length prefix then the bytes packed in-place. + private static void writeInlineView(MemorySegment views, int row, byte[] bytes) { + long off = (long) row * 16; + views.set(PTypeIO.LE_INT, off, bytes.length); + MemorySegment.copy(MemorySegment.ofArray(bytes), 0, views, off + 4, bytes.length); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VariantEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VariantEncodingDecoderTest.java index bf029387..841d1d19 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VariantEncodingDecoderTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/VariantEncodingDecoderTest.java @@ -12,11 +12,13 @@ import io.github.dfa1.vortex.proto.Primitive; import io.github.dfa1.vortex.proto.VariantMetadata; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; +import java.util.List; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -145,4 +147,166 @@ void decode_wrongChildCount_throws() { assertThatThrownBy(() -> SUT.decode(ctx)) .hasMessageContaining("expected 1 or 2 children"); } + + /// Exercises every branch of [VariantEncodingDecoder#dtypeFromProto] — the + /// proto-to-core DType translation that backs shredded-variant decoding. + @Nested + class DtypeFromProto { + + private static io.github.dfa1.vortex.proto.DType prim(io.github.dfa1.vortex.proto.PType pt, boolean nullable) { + return io.github.dfa1.vortex.proto.DType.ofPrimitive(new Primitive(pt, nullable)); + } + + @Test + void nullType() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofNull(new io.github.dfa1.vortex.proto.Null())); + + // Then null is always nullable + assertThat(result).isEqualTo(new DType.Null(true)); + } + + @Test + void bool() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofBool(new io.github.dfa1.vortex.proto.Bool(true))); + + // Then + assertThat(result).isEqualTo(new DType.Bool(true)); + } + + @Test + void primitive() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto(prim(io.github.dfa1.vortex.proto.PType.I64, false)); + + // Then + assertThat(result).isEqualTo(new DType.Primitive(PType.I64, false)); + } + + @Test + void decimal() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofDecimal(new io.github.dfa1.vortex.proto.Decimal(10, 2, false))); + + // Then precision/scale narrow to byte + assertThat(result).isEqualTo(new DType.Decimal((byte) 10, (byte) 2, false)); + } + + @Test + void utf8() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofUtf8(new io.github.dfa1.vortex.proto.Utf8(true))); + + // Then + assertThat(result).isEqualTo(new DType.Utf8(true)); + } + + @Test + void binary() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofBinary(new io.github.dfa1.vortex.proto.Binary(false))); + + // Then + assertThat(result).isEqualTo(new DType.Binary(false)); + } + + @Test + void struct() { + // Given a two-field struct with mixed child types + var proto = io.github.dfa1.vortex.proto.DType.ofStruct(new io.github.dfa1.vortex.proto.Struct( + List.of("a", "b"), + List.of(prim(io.github.dfa1.vortex.proto.PType.I32, false), + io.github.dfa1.vortex.proto.DType.ofUtf8(new io.github.dfa1.vortex.proto.Utf8(true))), + false)); + + // When children are translated recursively + DType result = VariantEncodingDecoder.dtypeFromProto(proto); + + // Then + assertThat(result).isEqualTo(new DType.Struct( + List.of("a", "b"), + List.of(new DType.Primitive(PType.I32, false), new DType.Utf8(true)), + false)); + } + + @Test + void list() { + // Given / When element type is translated recursively + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofList(new io.github.dfa1.vortex.proto.List( + prim(io.github.dfa1.vortex.proto.PType.I32, false), true))); + + // Then + assertThat(result).isEqualTo(new DType.List(new DType.Primitive(PType.I32, false), true)); + } + + @Test + void fixedSizeList() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofFixedSizeList(new io.github.dfa1.vortex.proto.FixedSizeList( + prim(io.github.dfa1.vortex.proto.PType.F64, false), 4, false))); + + // Then size is carried through + assertThat(result).isEqualTo( + new DType.FixedSizeList(new DType.Primitive(PType.F64, false), 4, false)); + } + + @Test + void extension_withMetadata() { + // Given an extension with non-null metadata bytes + var proto = io.github.dfa1.vortex.proto.DType.ofExtension(new io.github.dfa1.vortex.proto.Extension( + "ip.address", prim(io.github.dfa1.vortex.proto.PType.I32, false), new byte[]{1, 2, 3})); + + // When + DType result = VariantEncodingDecoder.dtypeFromProto(proto); + + // Then id, storage dtype, and metadata bytes are preserved + assertThat(result).isInstanceOf(DType.Extension.class); + DType.Extension ext = (DType.Extension) result; + assertThat(ext.extensionId()).isEqualTo("ip.address"); + assertThat(ext.storageDType()).isEqualTo(new DType.Primitive(PType.I32, false)); + assertThat(ext.metadata().remaining()).isEqualTo(3); + } + + @Test + void extension_nullMetadata_becomesEmptyBuffer() { + // Given null metadata — must not NPE, maps to an empty read-only buffer + var proto = io.github.dfa1.vortex.proto.DType.ofExtension(new io.github.dfa1.vortex.proto.Extension( + "uuid", prim(io.github.dfa1.vortex.proto.PType.I64, false), null)); + + // When + DType.Extension result = (DType.Extension) VariantEncodingDecoder.dtypeFromProto(proto); + + // Then + assertThat(result.metadata().remaining()).isZero(); + } + + @Test + void variant() { + // Given / When + DType result = VariantEncodingDecoder.dtypeFromProto( + io.github.dfa1.vortex.proto.DType.ofVariant(new io.github.dfa1.vortex.proto.Variant(false))); + + // Then + assertThat(result).isEqualTo(new DType.Variant(false)); + } + + @Test + void noFieldSet_throws() { + // Given a proto DType with no oneof arm populated + var empty = new io.github.dfa1.vortex.proto.DType( + null, null, null, null, null, null, null, null, null, null, null, null); + + // When / Then + assertThatThrownBy(() -> VariantEncodingDecoder.dtypeFromProto(empty)) + .hasMessageContaining("unsupported proto DType"); + } + } } diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/decode/ZigZagEncodingDecoderTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/ZigZagEncodingDecoderTest.java new file mode 100644 index 00000000..78730eab --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/decode/ZigZagEncodingDecoderTest.java @@ -0,0 +1,236 @@ +package io.github.dfa1.vortex.reader.decode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.TestSegments; +import io.github.dfa1.vortex.reader.ReadRegistry; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.ByteArray; +import io.github.dfa1.vortex.reader.array.IntArray; +import io.github.dfa1.vortex.reader.array.LongArray; +import io.github.dfa1.vortex.reader.array.ShortArray; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class ZigZagEncodingDecoderTest { + + private static final ZigZagEncodingDecoder SUT = new ZigZagEncodingDecoder(); + private static final ReadRegistry REGISTRY = TestRegistry.ofDecoders(SUT, new PrimitiveEncodingDecoder()); + + // --- zigzag encode helpers (mirror of the decoder's (u >>> 1) ^ -(u & 1)) --- + + private static MemorySegment encodedBytes(byte... signed) { + MemorySegment seg = Arena.ofAuto().allocate(signed.length); + for (int i = 0; i < signed.length; i++) { + seg.set(ValueLayout.JAVA_BYTE, i, (byte) ((signed[i] << 1) ^ (signed[i] >> 7))); + } + return seg; + } + + private static MemorySegment encodedShorts(short... signed) { + short[] u = new short[signed.length]; + for (int i = 0; i < signed.length; i++) { + u[i] = (short) ((signed[i] << 1) ^ (signed[i] >> 15)); + } + return TestSegments.leShorts(u); + } + + private static MemorySegment encodedInts(int... signed) { + int[] u = new int[signed.length]; + for (int i = 0; i < signed.length; i++) { + u[i] = (signed[i] << 1) ^ (signed[i] >> 31); + } + return TestSegments.leInts(u); + } + + private static MemorySegment encodedLongs(long... signed) { + long[] u = new long[signed.length]; + for (int i = 0; i < signed.length; i++) { + u[i] = (signed[i] << 1) ^ (signed[i] >> 63); + } + return TestSegments.leLongs(u); + } + + private static Array decode(PType ptype, long n, MemorySegment encoded) { + DType dtype = new DType.Primitive(ptype, false); + ArrayNode child = ArrayNode.of(EncodingId.VORTEX_PRIMITIVE, null, new ArrayNode[0], new int[]{0}); + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZIGZAG, null, new ArrayNode[]{child}, new int[]{}); + DecodeContext ctx = new DecodeContext(node, dtype, n, new MemorySegment[]{encoded}, REGISTRY, Arena.ofAuto()); + return SUT.decode(ctx); + } + + @Test + void encodingId_isZigzag() { + // Given / When / Then + assertThat(SUT.encodingId()).isEqualTo(EncodingId.VORTEX_ZIGZAG); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64"}) + void accepts_signedIntegers(PType ptype) { + // Given / When / Then + assertThat(SUT.accepts(new DType.Primitive(ptype, false))).isTrue(); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"U8", "U16", "U32", "U64", "F16", "F32", "F64"}) + void accepts_rejectsNonSigned(PType ptype) { + // Given / When / Then + assertThat(SUT.accepts(new DType.Primitive(ptype, false))).isFalse(); + } + + @Test + void accepts_rejectsNonPrimitive() { + // Given / When / Then + assertThat(SUT.accepts(new DType.Bool(false))).isFalse(); + } + + @Test + void decode_i8_roundTrip() { + // Given + byte[] signed = {0, -1, 1, Byte.MIN_VALUE, Byte.MAX_VALUE, -42}; + + // When + Array result = decode(PType.I8, signed.length, encodedBytes(signed)); + + // Then + assertThat(result).isInstanceOf(ByteArray.class); + ByteArray bytes = (ByteArray) result; + for (int i = 0; i < signed.length; i++) { + assertThat(bytes.getByte(i)).as("index %d", i).isEqualTo(signed[i]); + } + } + + @Test + void decode_i16_roundTrip() { + // Given + short[] signed = {0, -1, 1, Short.MIN_VALUE, Short.MAX_VALUE, -1000}; + + // When + Array result = decode(PType.I16, signed.length, encodedShorts(signed)); + + // Then + assertThat(result).isInstanceOf(ShortArray.class); + ShortArray shorts = (ShortArray) result; + for (int i = 0; i < signed.length; i++) { + assertThat(shorts.getShort(i)).as("index %d", i).isEqualTo(signed[i]); + } + } + + @Test + void decode_i32_roundTrip() { + // Given + int[] signed = {0, -1, 1, Integer.MIN_VALUE, Integer.MAX_VALUE, -123456}; + + // When + Array result = decode(PType.I32, signed.length, encodedInts(signed)); + + // Then + assertThat(result).isInstanceOf(IntArray.class); + IntArray ints = (IntArray) result; + for (int i = 0; i < signed.length; i++) { + assertThat(ints.getInt(i)).as("index %d", i).isEqualTo(signed[i]); + } + } + + @Test + void decode_i64_roundTrip() { + // Given + long[] signed = {0, -1, 1, Long.MIN_VALUE, Long.MAX_VALUE, -9_000_000_000L}; + + // When + Array result = decode(PType.I64, signed.length, encodedLongs(signed)); + + // Then + assertThat(result).isInstanceOf(LongArray.class); + LongArray longs = (LongArray) result; + for (int i = 0; i < signed.length; i++) { + assertThat(longs.getLong(i)).as("index %d", i).isEqualTo(signed[i]); + } + } + + // --- broadcast path: child holds a single encoded value, rowCount > 1 --- + + @Test + void decode_i8_broadcastsSingleValue() { + // Given a one-element child segment but four logical rows + long n = 4; + + // When + Array result = decode(PType.I8, n, encodedBytes((byte) -42)); + + // Then every row decodes to the lone value (zip-bomb-safe constant) + ByteArray bytes = (ByteArray) result; + for (long i = 0; i < n; i++) { + assertThat(bytes.getByte(i)).as("index %d", i).isEqualTo((byte) -42); + } + } + + @Test + void decode_i16_broadcastsSingleValue() { + // Given + long n = 3; + + // When + Array result = decode(PType.I16, n, encodedShorts((short) -1000)); + + // Then + ShortArray shorts = (ShortArray) result; + for (long i = 0; i < n; i++) { + assertThat(shorts.getShort(i)).as("index %d", i).isEqualTo((short) -1000); + } + } + + @Test + void decode_i32_broadcastsSingleValue() { + // Given + long n = 3; + + // When + Array result = decode(PType.I32, n, encodedInts(-123456)); + + // Then + IntArray ints = (IntArray) result; + for (long i = 0; i < n; i++) { + assertThat(ints.getInt(i)).as("index %d", i).isEqualTo(-123456); + } + } + + @Test + void decode_i64_broadcastsSingleValue() { + // Given + long n = 3; + + // When + Array result = decode(PType.I64, n, encodedLongs(-9_000_000_000L)); + + // Then + LongArray longs = (LongArray) result; + for (long i = 0; i < n; i++) { + assertThat(longs.getLong(i)).as("index %d", i).isEqualTo(-9_000_000_000L); + } + } + + @Test + void decode_nonPrimitiveDtype_throws() { + // Given a non-primitive logical type on the context + ArrayNode node = ArrayNode.of(EncodingId.VORTEX_ZIGZAG, null, new ArrayNode[0], new int[]{}); + DecodeContext ctx = new DecodeContext(node, new DType.Bool(false), 1, + new MemorySegment[0], REGISTRY, Arena.ofAuto()); + + // When / Then + assertThatThrownBy(() -> SUT.decode(ctx)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("expected primitive dtype"); + } +} diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/SequenceEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/SequenceEncodingEncoderTest.java index 9eba0181..24efe7e9 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/SequenceEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/SequenceEncodingEncoderTest.java @@ -3,11 +3,13 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.ByteArray; import io.github.dfa1.vortex.reader.array.DoubleArray; import io.github.dfa1.vortex.reader.array.Float16Array; import io.github.dfa1.vortex.reader.array.FloatArray; import io.github.dfa1.vortex.reader.array.IntArray; import io.github.dfa1.vortex.reader.array.LongArray; +import io.github.dfa1.vortex.reader.array.ShortArray; import io.github.dfa1.vortex.reader.decode.ArrayNode; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -51,6 +53,180 @@ void encodingId_isVortexSequence() { assertThat(ENCODER.encodingId()).isEqualTo(EncodingId.VORTEX_SEQUENCE); } + @Test + void accepts_primitive_true() { + // Given / When / Then + assertThat(ENCODER.accepts(DTypes.I32)).isTrue(); + } + + @Test + void accepts_nonPrimitive_false() { + // Given / When / Then + assertThat(ENCODER.accepts(new DType.Utf8(false))).isFalse(); + } + + @Test + void encode_i8_roundTrips() { + // Given + byte[] data = {-3, -1, 1, 3, 5}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.I8, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.I8, data.length); + ByteArray result = (ByteArray) DECODER.decode(ctx); + + // Then + for (int i = 0; i < data.length; i++) { + assertThat(result.getByte(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_i16_roundTrips() { + // Given + short[] data = {100, 90, 80, 70}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.I16, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.I16, data.length); + ShortArray result = (ShortArray) DECODER.decode(ctx); + + // Then + for (int i = 0; i < data.length; i++) { + assertThat(result.getShort(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_i32_roundTrips() { + // Given + int[] data = {0, 5, 10, 15}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.I32, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.I32, data.length); + IntArray result = (IntArray) DECODER.decode(ctx); + + // Then + for (int i = 0; i < data.length; i++) { + assertThat(result.getInt(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_u64_roundTrips_andMetadataIsUnsigned() throws Exception { + // Given unsigned dtype routes base/multiplier through ofUint64Value, not int64 + long[] data = {1000L, 1010L, 1020L}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.U64, data, EncodeTestHelper.testCtx()); + MemorySegment metaSeg = MemorySegment.ofBuffer(resultEncoded.rootNode().metadata().duplicate()); + SequenceMetadata meta = SequenceMetadata.decode(metaSeg, 0, metaSeg.byteSize()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.U64, data.length); + LongArray result = (LongArray) DECODER.decode(ctx); + + // Then metadata uses the unsigned oneof field + assertThat(meta.base().uint64_value()).isEqualTo(1000L); + assertThat(meta.multiplier().uint64_value()).isEqualTo(10L); + for (int i = 0; i < data.length; i++) { + assertThat(result.getLong(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_u16_roundTrips() { + // Given + short[] data = {7, 14, 21}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.U16, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.U16, data.length); + ShortArray result = (ShortArray) DECODER.decode(ctx); + + // Then + for (int i = 0; i < data.length; i++) { + assertThat(result.getShort(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_f32_roundTrips() { + // Given + float[] data = {0.0f, 0.25f, 0.5f, 0.75f}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.F32, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.F32, data.length); + FloatArray result = (FloatArray) DECODER.decode(ctx); + + // Then + for (int i = 0; i < data.length; i++) { + assertThat(result.getFloat(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @Test + void encode_emptyArray_roundTripsToZeroLength() { + // Given the n==0 branch: base and multiplier default to 0 + long[] data = {}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.I64, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.I64, 0); + Array result = DECODER.decode(ctx); + + // Then + assertThat(result.length()).isZero(); + } + + @Test + void encode_singleElement_multiplierIsZero() { + // Given the n==1 branch: multiplier stays 0, base is the lone value + long[] data = {42L}; + + // When + EncodeResult resultEncoded = ENCODER.encode(DTypes.I64, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = encodeResultToCtx(resultEncoded, DTypes.I64, 1); + LongArray result = (LongArray) DECODER.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(1); + assertThat(result.getLong(0)).isEqualTo(42L); + } + + @Test + void encode_nonArithmeticF32_throwsVortexException() { + // Given + float[] data = {1.0f, 2.0f, 4.0f}; + + // When / Then + assertThatThrownBy(() -> ENCODER.encode(DTypes.F32, data, EncodeTestHelper.testCtx())) + .isInstanceOf(VortexException.class); + } + + @Test + void encode_nonArithmeticF64_throwsVortexException() { + // Given + double[] data = {1.0, 2.0, 4.0}; + + // When / Then + assertThatThrownBy(() -> ENCODER.encode(DTypes.F64, data, EncodeTestHelper.testCtx())) + .isInstanceOf(VortexException.class); + } + + @Test + void encode_nonArithmeticF16_throwsVortexException() { + // Given + short[] data = { + Float.floatToFloat16(1.0f), + Float.floatToFloat16(2.0f), + Float.floatToFloat16(4.0f)}; + + // When / Then + assertThatThrownBy(() -> ENCODER.encode(DTypes.F16, data, EncodeTestHelper.testCtx())) + .isInstanceOf(VortexException.class); + } + @Test void encode_i64_roundTrips() { // Given diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/TimeExtensionEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/TimeExtensionEncoderTest.java new file mode 100644 index 00000000..768755d7 --- /dev/null +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/TimeExtensionEncoderTest.java @@ -0,0 +1,167 @@ +package io.github.dfa1.vortex.writer.encode; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.TimeUnit; +import io.github.dfa1.vortex.extension.ExtensionId; +import io.github.dfa1.vortex.extension.TimeDtype; +import org.junit.jupiter.api.Test; + +import java.nio.ByteBuffer; +import java.time.LocalTime; +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class TimeExtensionEncoderTest { + + private static final TimeExtensionEncoder SUT = TimeExtensionEncoder.INSTANCE; + private static final LocalTime T = LocalTime.of(1, 2, 3, 456_000_000); // 01:02:03.456 + + @Test + void extensionId_isVortexTime() { + // Given / When / Then + assertThat(SUT.extensionId()).isEqualTo(ExtensionId.VORTEX_TIME); + } + + @Test + void dtype_default_isMillisecondsOverI32() { + // Given / When + DType.Extension result = SUT.dtype(false); + + // Then + assertThat(TimeDtype.readUnit(result)).isEqualTo(TimeUnit.Milliseconds); + assertThat(result.storageDType()).isEqualTo(new DType.Primitive(PType.I32, false)); + } + + @Test + void dtype_seconds_usesI32() { + // Given / When + DType.Extension result = SUT.dtype(TimeUnit.Seconds, true); + + // Then + assertThat(TimeDtype.readUnit(result)).isEqualTo(TimeUnit.Seconds); + assertThat(result.storageDType()).isEqualTo(new DType.Primitive(PType.I32, true)); + } + + @Test + void dtype_nanoseconds_usesI64() { + // Given / When + DType.Extension result = SUT.dtype(TimeUnit.Nanoseconds, false); + + // Then + assertThat(TimeDtype.readUnit(result)).isEqualTo(TimeUnit.Nanoseconds); + assertThat(result.storageDType()).isEqualTo(new DType.Primitive(PType.I64, false)); + } + + @Test + void encodeAll_seconds_returnsIntArray() { + // Given seconds resolution truncates sub-second precision + DType.Extension dtype = SUT.dtype(TimeUnit.Seconds, false); + + // When + Object result = SUT.encodeAll(dtype, List.of(T)); + + // Then + assertThat(result).isInstanceOf(int[].class); + assertThat((int[]) result).containsExactly(1 * 3600 + 2 * 60 + 3); + } + + @Test + void encodeAll_milliseconds_returnsIntArray() { + // Given + DType.Extension dtype = SUT.dtype(TimeUnit.Milliseconds, false); + + // When + Object result = SUT.encodeAll(dtype, List.of(T)); + + // Then + long expectedMs = (1 * 3600 + 2 * 60 + 3) * 1000L + 456; + assertThat(result).isInstanceOf(int[].class); + assertThat((int[]) result).containsExactly((int) expectedMs); + } + + @Test + void encodeAll_microseconds_returnsLongArray() { + // Given + DType.Extension dtype = SUT.dtype(TimeUnit.Microseconds, false); + + // When + Object result = SUT.encodeAll(dtype, List.of(T)); + + // Then + assertThat(result).isInstanceOf(long[].class); + assertThat((long[]) result).containsExactly(T.toNanoOfDay() / 1_000L); + } + + @Test + void encodeAll_nanoseconds_returnsLongArray() { + // Given + DType.Extension dtype = SUT.dtype(TimeUnit.Nanoseconds, false); + + // When + Object result = SUT.encodeAll(dtype, List.of(T)); + + // Then + assertThat(result).isInstanceOf(long[].class); + assertThat((long[]) result).containsExactly(T.toNanoOfDay()); + } + + @Test + void encodeAll_nullWithNullableInt_returnsNullableDataWithZeroPlaceholder() { + // Given a null in a nullable column + DType.Extension dtype = SUT.dtype(TimeUnit.Milliseconds, true); + + // When + Object result = SUT.encodeAll(dtype, Arrays.asList(T, null)); + + // Then storage carries a zero placeholder at the null position; validity marks it + assertThat(result).isInstanceOf(NullableData.class); + NullableData nd = (NullableData) result; + int[] values = (int[]) nd.values(); + assertThat(values[1]).isZero(); + assertThat(nd.validity()).containsExactly(true, false); + } + + @Test + void encodeAll_nullWithNullableLong_returnsNullableData() { + // Given a null in a nullable μs column (long storage) + DType.Extension dtype = SUT.dtype(TimeUnit.Microseconds, true); + + // When + Object result = SUT.encodeAll(dtype, Arrays.asList(null, T)); + + // Then + NullableData nd = (NullableData) result; + assertThat(nd.values()).isInstanceOf(long[].class); + assertThat(nd.validity()).containsExactly(false, true); + } + + @Test + void encodeAll_nullInNonNullableColumn_throws() { + // Given a null in a non-nullable column + DType.Extension dtype = SUT.dtype(TimeUnit.Milliseconds, false); + + // When / Then + assertThatThrownBy(() -> SUT.encodeAll(dtype, Arrays.asList(T, null))) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-nullable"); + } + + @Test + void encodeAll_daysUnit_throws() { + // Given a hand-built Days-tagged dtype (TimeDtype.of rejects Days, so build directly) + ByteBuffer meta = ByteBuffer.allocate(1); + meta.put(0, (byte) TimeUnit.Days.ordinal()); + DType.Extension dtype = new DType.Extension( + ExtensionId.VORTEX_TIME.id(), new DType.Primitive(PType.I32, false), meta, false); + + // When / Then + assertThatThrownBy(() -> SUT.encodeAll(dtype, List.of(T))) + .isInstanceOf(VortexException.class) + .hasMessageContaining("Days"); + } +} diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/ZigZagEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/ZigZagEncodingEncoderTest.java index 13dbc47d..d797f1df 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/ZigZagEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/ZigZagEncodingEncoderTest.java @@ -1,8 +1,11 @@ package io.github.dfa1.vortex.writer.encode; +import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.ByteArray; import io.github.dfa1.vortex.reader.array.IntArray; import io.github.dfa1.vortex.reader.array.LongArray; +import io.github.dfa1.vortex.reader.array.ShortArray; import io.github.dfa1.vortex.reader.decode.ArrayNode; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -90,9 +93,86 @@ void decode_empty_returnsEmptyArray() { } } + @Test + void encodingId_isVortexZigzag() { + // Given / When / Then + assertThat(ENCODER.encodingId()).isEqualTo(EncodingId.VORTEX_ZIGZAG); + } + + @Test + void accepts_signedIntegers_true() { + // Given / When / Then + assertThat(ENCODER.accepts(DTypes.I8)).isTrue(); + assertThat(ENCODER.accepts(DTypes.I16)).isTrue(); + assertThat(ENCODER.accepts(DTypes.I32)).isTrue(); + assertThat(ENCODER.accepts(DTypes.I64)).isTrue(); + } + + @Test + void accepts_unsignedOrNonPrimitive_false() { + // Given / When / Then + assertThat(ENCODER.accepts(DTypes.U32)).isFalse(); + assertThat(ENCODER.accepts(DTypes.F64)).isFalse(); + assertThat(ENCODER.accepts(new DType.Utf8(false))).isFalse(); + } + @Nested class Encode { + static Stream i8RoundtripArrays() { + return Stream.of( + new byte[]{}, + new byte[]{0}, + new byte[]{-1, 1, -2, 2}, + new byte[]{Byte.MIN_VALUE, Byte.MAX_VALUE, 0} + ); + } + + static Stream i16RoundtripArrays() { + return Stream.of( + new short[]{}, + new short[]{0}, + new short[]{Short.MIN_VALUE, Short.MAX_VALUE, 0}, + new short[]{-100, 100, -1000, 1000} + ); + } + + @ParameterizedTest + @MethodSource("i8RoundtripArrays") + void encodeDecode_i8_isLossless(byte[] data) { + // Given + EncodeResult resultEncoded = ENCODER.encode(DTypes.I8, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = DecodeTestHelper.toDecodeContext(resultEncoded, data.length, DTypes.I8, REGISTRY); + + // When + Array result = DECODER.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(data.length); + ByteArray arr = (ByteArray) result; + for (int i = 0; i < data.length; i++) { + assertThat(arr.getByte(i)).as("index %d", i).isEqualTo(data[i]); + } + } + + @ParameterizedTest + @MethodSource("i16RoundtripArrays") + void encodeDecode_i16_isLossless(short[] data) { + // Given + EncodeResult resultEncoded = ENCODER.encode(DTypes.I16, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = DecodeTestHelper.toDecodeContext(resultEncoded, data.length, DTypes.I16, REGISTRY); + + // When + Array result = DECODER.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(data.length); + ShortArray arr = (ShortArray) result; + for (int i = 0; i < data.length; i++) { + assertThat(arr.getShort(i)).as("index %d", i).isEqualTo(data[i]); + } + } + static Stream i32RoundtripArrays() { return Stream.of( new int[]{},