Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions writer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@
<param>io.github.dfa1.vortex.writer.WriteRegistry</param>
<param>io.github.dfa1.vortex.writer.WriteRegistry$Builder</param>
<param>io.github.dfa1.vortex.writer.VortexWriter</param>
<param>io.github.dfa1.vortex.writer.encode.DeltaEncodingEncoder</param>
<param>io.github.dfa1.vortex.writer.encode.AlpRdEncodingEncoder</param>
</targetClasses>
</configuration>
</plugin>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package io.github.dfa1.vortex.writer.encode;

import io.github.dfa1.vortex.encoding.DTypes;
import io.github.dfa1.vortex.reader.array.DoubleArray;
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.decode.DecodeContext;

import io.github.dfa1.vortex.reader.ReadRegistry;
Expand All @@ -10,6 +12,12 @@
import io.github.dfa1.vortex.reader.decode.BitpackedEncodingDecoder;
import io.github.dfa1.vortex.reader.decode.PrimitiveEncodingDecoder;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.Random;
import java.util.stream.Stream;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.within;
Expand Down Expand Up @@ -72,4 +80,94 @@ void encode_f64_metadata_rightBitWidth_isNonZero() throws Exception {
// Then
assertThat(meta.right_bit_width()).isGreaterThan(0);
}

// Property test: ALPRD is a lossless raw-bit split (dictionary left parts + bit-packed right parts,
// exceptions stored verbatim), so the round-trip must be *bit-exact* for arbitrary finite values —
// including -0.0 and exception-heavy random data where most left parts miss the 8-entry dictionary.
// Sizes past 512 (the sampling window) and past one bit-pack chunk exercise the exception + multi-
// chunk paths the 5-element happy-path tests never reach.
@ParameterizedTest(name = "f64/{0}")
@MethodSource("sizes")
void encodeDecode_randomF64_isBitExact(int n) {
// Given
double[] values = randomDoubles(n, new Random(0xA1B2C3D4L + n));
var decoder = new AlpRdEncodingDecoder();
ReadRegistry registry = TestRegistry.ofDecoders(decoder, new BitpackedEncodingDecoder(), new PrimitiveEncodingDecoder());

// When
EncodeResult encoded = new AlpRdEncodingEncoder().encode(DTypes.F64, values, EncodeTestHelper.testCtx());
DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, DTypes.F64, registry);
DoubleArray result = (DoubleArray) decoder.decode(ctx);

// Then
assertThat(result.length()).isEqualTo(n);
for (int i = 0; i < n; i++) {
assertThat(Double.doubleToRawLongBits(result.getDouble(i)))
.as("idx %d", i).isEqualTo(Double.doubleToRawLongBits(values[i]));
}
}

@ParameterizedTest(name = "f32/{0}")
@MethodSource("sizes")
void encodeDecode_randomF32_isBitExact(int n) {
// Given
float[] values = randomFloats(n, new Random(0xE5F60718L + n));
var decoder = new AlpRdEncodingDecoder();
ReadRegistry registry = TestRegistry.ofDecoders(decoder, new BitpackedEncodingDecoder(), new PrimitiveEncodingDecoder());

// When
EncodeResult encoded = new AlpRdEncodingEncoder().encode(DTypes.F32, values, EncodeTestHelper.testCtx());
DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, DTypes.F32, registry);
FloatArray result = (FloatArray) decoder.decode(ctx);

// Then
assertThat(result.length()).isEqualTo(n);
for (int i = 0; i < n; i++) {
assertThat(Float.floatToRawIntBits(result.getFloat(i)))
.as("idx %d", i).isEqualTo(Float.floatToRawIntBits(values[i]));
}
}

@Test
void accepts_floatPtypesOnly() {
// Given / When / Then — only F32/F64 are encodable; integers and non-primitives are rejected
var encoder = new AlpRdEncodingEncoder();
var decoder = new AlpRdEncodingDecoder();
assertThat(encoder.accepts(DTypes.F32)).isTrue();
assertThat(encoder.accepts(DTypes.F64)).isTrue();
assertThat(decoder.accepts(DTypes.F32)).isTrue();
assertThat(decoder.accepts(DTypes.F64)).isTrue();
assertThat(encoder.accepts(DTypes.I64)).isFalse();
assertThat(encoder.accepts(DTypes.UTF8)).isFalse();
assertThat(decoder.accepts(DTypes.I32)).isFalse();
}

private static Stream<Arguments> sizes() {
// 0 → empty path; 1/5 → sub-sample; 1024/1025/3000 → past the 512 sample window + multi-chunk.
return Stream.of(0, 1, 5, 1024, 1025, 3000).map(Arguments::of);
}

private static double[] randomDoubles(int n, Random rng) {
double[] a = new double[n];
for (int i = 0; i < n; i++) {
double d;
do {
d = Double.longBitsToDouble(rng.nextLong());
} while (!Double.isFinite(d));
a[i] = d;
}
return a;
}

private static float[] randomFloats(int n, Random rng) {
float[] a = new float[n];
for (int i = 0; i < n; i++) {
float f;
do {
f = Float.intBitsToFloat(rng.nextInt());
} while (!Float.isFinite(f));
a[i] = f;
}
return a;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package io.github.dfa1.vortex.writer.encode;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.reader.array.Array;
import io.github.dfa1.vortex.encoding.DTypes;
import io.github.dfa1.vortex.reader.decode.DecodeContext;
Expand All @@ -8,15 +10,21 @@
import io.github.dfa1.vortex.reader.ReadRegistry;
import io.github.dfa1.vortex.reader.decode.TestRegistry;
import io.github.dfa1.vortex.proto.DeltaMetadata;
import io.github.dfa1.vortex.proto.ScalarValue;
import io.github.dfa1.vortex.reader.decode.DeltaEncodingDecoder;
import io.github.dfa1.vortex.reader.decode.PrimitiveEncodingDecoder;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;

import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.stream.Stream;

import static org.assertj.core.api.Assertions.assertThat;
Expand Down Expand Up @@ -120,4 +128,142 @@ void encode_i64_metadata_deltasLen_isNonZero() throws Exception {
// Then
assertThat(meta.deltas_len()).isGreaterThan(0);
}

// Property test: seeded-random arrays across every accepted integer ptype and a range of sizes.
// The hand-picked cases above all stay under one FastLanes chunk (1024); the 1024/1025/3000 sizes
// here exercise the multi-chunk loop, the cross-chunk transpose, and the offset-slice tail — the
// bulk of the encode/decode logic that small arrays never reach.
@ParameterizedTest(name = "{0}")
@MethodSource("randomIntegerArrays")
void encodeDecode_randomAcrossPtypesAndSizes_isLossless(String name, DType dtype, Object data, int n) {
// Given
EncodeResult encoded = ENCODER.encode(dtype, data, EncodeTestHelper.testCtx());
DecodeContext ctx = DecodeTestHelper.toDecodeContext(encoded, n, dtype, REGISTRY);

// When
Array result = DECODER.decode(ctx);

// Then — round-trip reproduces every element's raw bytes exactly
assertThat(result.length()).isEqualTo(n);
MemorySegment seg = result.materialize(Arena.ofAuto());
PType ptype = ((DType.Primitive) dtype).ptype();
for (int i = 0; i < n; i++) {
long off = (long) i * ptype.byteSize();
switch (ptype) {
case I8, U8 -> assertThat(seg.get(ValueLayout.JAVA_BYTE, off)).as("idx %d", i).isEqualTo(((byte[]) data)[i]);
case I16, U16 -> assertThat(seg.get(PTypeIO.LE_SHORT, off)).as("idx %d", i).isEqualTo(((short[]) data)[i]);
case I32, U32 -> assertThat(seg.get(PTypeIO.LE_INT, off)).as("idx %d", i).isEqualTo(((int[]) data)[i]);
case I64, U64 -> assertThat(seg.get(PTypeIO.LE_LONG, off)).as("idx %d", i).isEqualTo(((long[]) data)[i]);
default -> throw new AssertionError(ptype);
}
}
}

@ParameterizedTest
@ValueSource(strings = {"I8", "I16", "I32", "I64", "U8", "U16", "U32", "U64"})
void accepts_everyIntegerPtype_isTrue(String ptype) {
// Given / When / Then
assertThat(ENCODER.accepts(new DType.Primitive(PType.valueOf(ptype), false))).isTrue();
assertThat(DECODER.accepts(new DType.Primitive(PType.valueOf(ptype), false))).isTrue();
}

@Test
void accepts_nonIntegerOrNonPrimitive_isFalse() {
// Given / When / Then — floats and non-primitive dtypes are rejected by both sides
assertThat(ENCODER.accepts(DTypes.F64)).isFalse();
assertThat(ENCODER.accepts(DTypes.UTF8)).isFalse();
assertThat(DECODER.accepts(DTypes.F32)).isFalse();
assertThat(DECODER.accepts(DTypes.BOOL)).isFalse();
}

@Test
void encode_signedI64_statsCarryMinAndMax() throws Exception {
// Given — unordered; min/max are interior so a broken scan (negated compare) picks a wrong value
long[] data = {30L, -10L, 50L, 20L, 40L};

// When
EncodeResult result = ENCODER.encode(DTypes.I64, data, EncodeTestHelper.testCtx());

// Then — signed stats use the int64 scalar field, min/max by signed ordering
assertThat(result.hasStats()).isTrue();
assertThat(scalar(result.statsMin()).int64_value()).isEqualTo(-10L);
assertThat(scalar(result.statsMax()).int64_value()).isEqualTo(50L);
}

@Test
void encode_unsignedU64_statsUseUnsignedOrderingAndField() throws Exception {
// Given — -1L is the max value under unsigned ordering but the min under signed ordering, so this
// pins both the unsigned compare (lines 57/60) and the unsigned stats field (isUnsigned/statsBytes)
long[] data = {1L, -1L, 5L};

// When
EncodeResult result = ENCODER.encode(DTypes.U64, data, EncodeTestHelper.testCtx());

// Then
assertThat(scalar(result.statsMin()).uint64_value()).isEqualTo(1L);
assertThat(scalar(result.statsMax()).uint64_value()).isEqualTo(-1L);
}

@Test
void encode_empty_hasNoStats() {
// Given / When — the n>0 guard must suppress stats for an empty array
EncodeResult result = ENCODER.encode(DTypes.I64, new long[0], EncodeTestHelper.testCtx());

// Then
assertThat(result.statsMin()).isNull();
assertThat(result.statsMax()).isNull();
assertThat(result.hasStats()).isFalse();
}

private static ScalarValue scalar(byte[] bytes) throws java.io.IOException {
MemorySegment seg = MemorySegment.ofArray(bytes);
return ScalarValue.decode(seg, 0, seg.byteSize());
}

private static Stream<Arguments> randomIntegerArrays() {
Random rng = new Random(0xD317A1L);
// 0 → empty path; 1/5 → sub-chunk; 1024 → exactly one chunk; 1025/3000 → multi-chunk + tail slice.
int[] sizes = {0, 1, 5, 1024, 1025, 3000};
DType[] dtypes = {DTypes.I8, DTypes.I16, DTypes.I32, DTypes.I64, DTypes.U8, DTypes.U16, DTypes.U32, DTypes.U64};
List<Arguments> out = new ArrayList<>();
for (DType dtype : dtypes) {
PType ptype = ((DType.Primitive) dtype).ptype();
for (int n : sizes) {
out.add(Arguments.of(ptype + "/" + n, dtype, randomArray(ptype, n, rng), n));
}
}
return out.stream();
}

private static Object randomArray(PType ptype, int n, Random rng) {
return switch (ptype) {
case I8, U8 -> {
byte[] a = new byte[n];
rng.nextBytes(a);
yield a;
}
case I16, U16 -> {
short[] a = new short[n];
for (int i = 0; i < n; i++) {
a[i] = (short) rng.nextInt();
}
yield a;
}
case I32, U32 -> {
int[] a = new int[n];
for (int i = 0; i < n; i++) {
a[i] = rng.nextInt();
}
yield a;
}
case I64, U64 -> {
long[] a = new long[n];
for (int i = 0; i < n; i++) {
a[i] = rng.nextLong();
}
yield a;
}
default -> throw new AssertionError(ptype);
};
}
}