diff --git a/writer/pom.xml b/writer/pom.xml index 15449edf..5ec74370 100644 --- a/writer/pom.xml +++ b/writer/pom.xml @@ -82,6 +82,8 @@ io.github.dfa1.vortex.writer.WriteRegistry io.github.dfa1.vortex.writer.WriteRegistry$Builder io.github.dfa1.vortex.writer.VortexWriter + io.github.dfa1.vortex.writer.encode.DeltaEncodingEncoder + io.github.dfa1.vortex.writer.encode.AlpRdEncodingEncoder diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/AlpRdEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/AlpRdEncodingEncoderTest.java index b65161eb..23faff54 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/AlpRdEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/AlpRdEncodingEncoderTest.java @@ -1,6 +1,8 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.encoding.DTypes; +import io.github.dfa1.vortex.reader.array.DoubleArray; +import io.github.dfa1.vortex.reader.array.FloatArray; import io.github.dfa1.vortex.reader.decode.DecodeContext; import io.github.dfa1.vortex.reader.ReadRegistry; @@ -10,6 +12,12 @@ import io.github.dfa1.vortex.reader.decode.BitpackedEncodingDecoder; import io.github.dfa1.vortex.reader.decode.PrimitiveEncodingDecoder; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.Random; +import java.util.stream.Stream; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.within; @@ -72,4 +80,94 @@ void encode_f64_metadata_rightBitWidth_isNonZero() throws Exception { // Then assertThat(meta.right_bit_width()).isGreaterThan(0); } + + // Property test: ALPRD is a lossless raw-bit split (dictionary left parts + bit-packed right parts, + // exceptions stored verbatim), so the round-trip must be *bit-exact* for arbitrary finite values — + // including -0.0 and exception-heavy random data where most left parts miss the 8-entry dictionary. + // Sizes past 512 (the sampling window) and past one bit-pack chunk exercise the exception + multi- + // chunk paths the 5-element happy-path tests never reach. + @ParameterizedTest(name = "f64/{0}") + @MethodSource("sizes") + void encodeDecode_randomF64_isBitExact(int n) { + // Given + double[] values = randomDoubles(n, new Random(0xA1B2C3D4L + n)); + var decoder = new AlpRdEncodingDecoder(); + ReadRegistry registry = TestRegistry.ofDecoders(decoder, new BitpackedEncodingDecoder(), new PrimitiveEncodingDecoder()); + + // When + EncodeResult encoded = new AlpRdEncodingEncoder().encode(DTypes.F64, values, EncodeTestHelper.testCtx()); + DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, DTypes.F64, registry); + DoubleArray result = (DoubleArray) decoder.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(n); + for (int i = 0; i < n; i++) { + assertThat(Double.doubleToRawLongBits(result.getDouble(i))) + .as("idx %d", i).isEqualTo(Double.doubleToRawLongBits(values[i])); + } + } + + @ParameterizedTest(name = "f32/{0}") + @MethodSource("sizes") + void encodeDecode_randomF32_isBitExact(int n) { + // Given + float[] values = randomFloats(n, new Random(0xE5F60718L + n)); + var decoder = new AlpRdEncodingDecoder(); + ReadRegistry registry = TestRegistry.ofDecoders(decoder, new BitpackedEncodingDecoder(), new PrimitiveEncodingDecoder()); + + // When + EncodeResult encoded = new AlpRdEncodingEncoder().encode(DTypes.F32, values, EncodeTestHelper.testCtx()); + DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, DTypes.F32, registry); + FloatArray result = (FloatArray) decoder.decode(ctx); + + // Then + assertThat(result.length()).isEqualTo(n); + for (int i = 0; i < n; i++) { + assertThat(Float.floatToRawIntBits(result.getFloat(i))) + .as("idx %d", i).isEqualTo(Float.floatToRawIntBits(values[i])); + } + } + + @Test + void accepts_floatPtypesOnly() { + // Given / When / Then — only F32/F64 are encodable; integers and non-primitives are rejected + var encoder = new AlpRdEncodingEncoder(); + var decoder = new AlpRdEncodingDecoder(); + assertThat(encoder.accepts(DTypes.F32)).isTrue(); + assertThat(encoder.accepts(DTypes.F64)).isTrue(); + assertThat(decoder.accepts(DTypes.F32)).isTrue(); + assertThat(decoder.accepts(DTypes.F64)).isTrue(); + assertThat(encoder.accepts(DTypes.I64)).isFalse(); + assertThat(encoder.accepts(DTypes.UTF8)).isFalse(); + assertThat(decoder.accepts(DTypes.I32)).isFalse(); + } + + private static Stream sizes() { + // 0 → empty path; 1/5 → sub-sample; 1024/1025/3000 → past the 512 sample window + multi-chunk. + return Stream.of(0, 1, 5, 1024, 1025, 3000).map(Arguments::of); + } + + private static double[] randomDoubles(int n, Random rng) { + double[] a = new double[n]; + for (int i = 0; i < n; i++) { + double d; + do { + d = Double.longBitsToDouble(rng.nextLong()); + } while (!Double.isFinite(d)); + a[i] = d; + } + return a; + } + + private static float[] randomFloats(int n, Random rng) { + float[] a = new float[n]; + for (int i = 0; i < n; i++) { + float f; + do { + f = Float.intBitsToFloat(rng.nextInt()); + } while (!Float.isFinite(f)); + a[i] = f; + } + return a; + } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java index 604edec4..eed4307f 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java @@ -1,5 +1,7 @@ package io.github.dfa1.vortex.writer.encode; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.reader.array.Array; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -8,15 +10,21 @@ import io.github.dfa1.vortex.reader.ReadRegistry; import io.github.dfa1.vortex.reader.decode.TestRegistry; import io.github.dfa1.vortex.proto.DeltaMetadata; +import io.github.dfa1.vortex.proto.ScalarValue; import io.github.dfa1.vortex.reader.decode.DeltaEncodingDecoder; import io.github.dfa1.vortex.reader.decode.PrimitiveEncodingDecoder; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; import java.util.stream.Stream; import static org.assertj.core.api.Assertions.assertThat; @@ -120,4 +128,142 @@ void encode_i64_metadata_deltasLen_isNonZero() throws Exception { // Then assertThat(meta.deltas_len()).isGreaterThan(0); } + + // Property test: seeded-random arrays across every accepted integer ptype and a range of sizes. + // The hand-picked cases above all stay under one FastLanes chunk (1024); the 1024/1025/3000 sizes + // here exercise the multi-chunk loop, the cross-chunk transpose, and the offset-slice tail — the + // bulk of the encode/decode logic that small arrays never reach. + @ParameterizedTest(name = "{0}") + @MethodSource("randomIntegerArrays") + void encodeDecode_randomAcrossPtypesAndSizes_isLossless(String name, DType dtype, Object data, int n) { + // Given + EncodeResult encoded = ENCODER.encode(dtype, data, EncodeTestHelper.testCtx()); + DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, dtype, REGISTRY); + + // When + Array result = DECODER.decode(ctx); + + // Then — round-trip reproduces every element's raw bytes exactly + assertThat(result.length()).isEqualTo(n); + MemorySegment seg = result.materialize(Arena.ofAuto()); + PType ptype = ((DType.Primitive) dtype).ptype(); + for (int i = 0; i < n; i++) { + long off = (long) i * ptype.byteSize(); + switch (ptype) { + case I8, U8 -> assertThat(seg.get(ValueLayout.JAVA_BYTE, off)).as("idx %d", i).isEqualTo(((byte[]) data)[i]); + case I16, U16 -> assertThat(seg.get(PTypeIO.LE_SHORT, off)).as("idx %d", i).isEqualTo(((short[]) data)[i]); + case I32, U32 -> assertThat(seg.get(PTypeIO.LE_INT, off)).as("idx %d", i).isEqualTo(((int[]) data)[i]); + case I64, U64 -> assertThat(seg.get(PTypeIO.LE_LONG, off)).as("idx %d", i).isEqualTo(((long[]) data)[i]); + default -> throw new AssertionError(ptype); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"I8", "I16", "I32", "I64", "U8", "U16", "U32", "U64"}) + void accepts_everyIntegerPtype_isTrue(String ptype) { + // Given / When / Then + assertThat(ENCODER.accepts(new DType.Primitive(PType.valueOf(ptype), false))).isTrue(); + assertThat(DECODER.accepts(new DType.Primitive(PType.valueOf(ptype), false))).isTrue(); + } + + @Test + void accepts_nonIntegerOrNonPrimitive_isFalse() { + // Given / When / Then — floats and non-primitive dtypes are rejected by both sides + assertThat(ENCODER.accepts(DTypes.F64)).isFalse(); + assertThat(ENCODER.accepts(DTypes.UTF8)).isFalse(); + assertThat(DECODER.accepts(DTypes.F32)).isFalse(); + assertThat(DECODER.accepts(DTypes.BOOL)).isFalse(); + } + + @Test + void encode_signedI64_statsCarryMinAndMax() throws Exception { + // Given — unordered; min/max are interior so a broken scan (negated compare) picks a wrong value + long[] data = {30L, -10L, 50L, 20L, 40L}; + + // When + EncodeResult result = ENCODER.encode(DTypes.I64, data, EncodeTestHelper.testCtx()); + + // Then — signed stats use the int64 scalar field, min/max by signed ordering + assertThat(result.hasStats()).isTrue(); + assertThat(scalar(result.statsMin()).int64_value()).isEqualTo(-10L); + assertThat(scalar(result.statsMax()).int64_value()).isEqualTo(50L); + } + + @Test + void encode_unsignedU64_statsUseUnsignedOrderingAndField() throws Exception { + // Given — -1L is the max value under unsigned ordering but the min under signed ordering, so this + // pins both the unsigned compare (lines 57/60) and the unsigned stats field (isUnsigned/statsBytes) + long[] data = {1L, -1L, 5L}; + + // When + EncodeResult result = ENCODER.encode(DTypes.U64, data, EncodeTestHelper.testCtx()); + + // Then + assertThat(scalar(result.statsMin()).uint64_value()).isEqualTo(1L); + assertThat(scalar(result.statsMax()).uint64_value()).isEqualTo(-1L); + } + + @Test + void encode_empty_hasNoStats() { + // Given / When — the n>0 guard must suppress stats for an empty array + EncodeResult result = ENCODER.encode(DTypes.I64, new long[0], EncodeTestHelper.testCtx()); + + // Then + assertThat(result.statsMin()).isNull(); + assertThat(result.statsMax()).isNull(); + assertThat(result.hasStats()).isFalse(); + } + + private static ScalarValue scalar(byte[] bytes) throws java.io.IOException { + MemorySegment seg = MemorySegment.ofArray(bytes); + return ScalarValue.decode(seg, 0, seg.byteSize()); + } + + private static Stream randomIntegerArrays() { + Random rng = new Random(0xD317A1L); + // 0 → empty path; 1/5 → sub-chunk; 1024 → exactly one chunk; 1025/3000 → multi-chunk + tail slice. + int[] sizes = {0, 1, 5, 1024, 1025, 3000}; + DType[] dtypes = {DTypes.I8, DTypes.I16, DTypes.I32, DTypes.I64, DTypes.U8, DTypes.U16, DTypes.U32, DTypes.U64}; + List out = new ArrayList<>(); + for (DType dtype : dtypes) { + PType ptype = ((DType.Primitive) dtype).ptype(); + for (int n : sizes) { + out.add(Arguments.of(ptype + "/" + n, dtype, randomArray(ptype, n, rng), n)); + } + } + return out.stream(); + } + + private static Object randomArray(PType ptype, int n, Random rng) { + return switch (ptype) { + case I8, U8 -> { + byte[] a = new byte[n]; + rng.nextBytes(a); + yield a; + } + case I16, U16 -> { + short[] a = new short[n]; + for (int i = 0; i < n; i++) { + a[i] = (short) rng.nextInt(); + } + yield a; + } + case I32, U32 -> { + int[] a = new int[n]; + for (int i = 0; i < n; i++) { + a[i] = rng.nextInt(); + } + yield a; + } + case I64, U64 -> { + long[] a = new long[n]; + for (int i = 0; i < n; i++) { + a[i] = rng.nextLong(); + } + yield a; + } + default -> throw new AssertionError(ptype); + }; + } }