From a842b97d10831f01030935844d50743a7931c3f1 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 16:04:24 +0200 Subject: [PATCH 1/2] =?UTF-8?q?test(reader):=20kill=20remaining=20bounds?= =?UTF-8?q?=20survivors=20=E2=80=94=20reader=20to=2099%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the four killable mutation survivors, leaving only one provably equivalent mutant. - Trailer postscript-length boundary (`postscriptLen > bodyBytes`): TrailerLengthBoundaryTest pins the exact-fill edge — postscriptLen == bodyBytes parses, one over throws. - FlatSegmentDecoder buffer-offset walk + unknown-encoding fallback: FlatSegmentDecoderDecodeTest decodes a flat segment whose single buffer has non-zero padding through an allow-unknown registry. The +padding accumulation must run (a subtraction slices at a negative offset) and the UnknownArrayNode fallback must produce an UnknownArray — previously a NO_COVERAGE branch. - Layout metadata-size cap: two cases in PostscriptParserParseBlobsBoundsTest build metadata of exactly MAX_LAYOUT_METADATA_BYTES (parses) and one over (throws). Costs a 4 MiB allocation, acceptable for a unit test. Reader 95% -> 99% (105/106), test strength 99%, no-coverage 0. The lone survivor is checkBlobBounds `length < 0`: unreachable since blob length is a u32 off the wire, so it is a genuine equivalent mutant, not a test gap. Co-Authored-By: Claude Opus 4.8 --- .../reader/FlatSegmentDecoderDecodeTest.java | 76 +++++++++++++++++++ .../PostscriptParserParseBlobsBoundsTest.java | 35 +++++++++ .../reader/TrailerLengthBoundaryTest.java | 65 ++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java create mode 100644 reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java new file mode 100644 index 00000000..5666e058 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java @@ -0,0 +1,76 @@ +package io.github.dfa1.vortex.reader; + +import com.google.flatbuffers.FlatBufferBuilder; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.fbs.ArrayNode; +import io.github.dfa1.vortex.fbs.Buffer; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.UnknownArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Successful flat-segment decode path — complements [FlatSegmentBoundsSecurityTest] (which only +/// drives the rejection paths). A buffer descriptor with non-zero padding exercises the offset +/// walk, and an unknown encoding id exercises the `UnknownArrayNode` fallback through an +/// allow-unknown registry. Together these pin two otherwise-untested spots: +/// - the `dataOffset += padding` accumulation: with padding > 0, flipping `+=` to `-=` slices at a +/// negative offset and fails, so a clean decode proves the addition. +/// - the `orElseGet(() -> new UnknownArrayNode(...))` fallback: returning `null` there yields a +/// null node and the decode would not produce an `UnknownArray`. +class FlatSegmentDecoderDecodeTest { + + private static final ValueLayout.OfInt LE_INT = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + private static final DType DTYPE = new DType.Primitive(PType.I32, false); + + @Test + void decode_unknownEncodingWithBufferPadding_returnsUnknownArray() { + ReadRegistry registry = ReadRegistry.builder().allowUnknown().build(); + FlatSegmentDecoder sut = new FlatSegmentDecoder(registry); + + try (Arena arena = Arena.ofConfined()) { + // Given — a flat segment whose single buffer carries 8 bytes of leading padding and + // an unrecognised encoding id. The 8 pad bytes are the entire buffer-data region + // (buffer length 0), so the decoder must advance dataOffset by +8 to slice it; a + // subtraction would slice at offset -8. + int padding = 8; + byte[] fb = arrayFlatBufferOneBuffer(padding, 0L); + MemorySegment seg = arena.allocate((long) padding + fb.length + 4); + MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, padding, fb.length); + seg.set(LE_INT, padding + fb.length, fb.length); + + // When — encoding index 0 maps to an id no decoder handles + Array result = sut.decode(seg, List.of("vortex.nonexistent"), DTYPE, 0, arena); + + // Then — the allow-unknown path produced an UnknownArray (proves both the +padding + // walk and the UnknownArrayNode fallback ran) + assertThat(result).isInstanceOf(UnknownArray.class); + } + } + + /// Builds an `Array` FlatBuffer with a single buffer descriptor of the given padding/length. + private static byte[] arrayFlatBufferOneBuffer(int padding, long length) { + FlatBufferBuilder b = new FlatBufferBuilder(); + + int rootChildren = ArrayNode.createChildrenVector(b, new int[0]); + int rootBuffers = ArrayNode.createBuffersVector(b, new int[]{0}); + int root = ArrayNode.createArrayNode(b, 0, 0, rootChildren, rootBuffers, 0); + + io.github.dfa1.vortex.fbs.Array.startBuffersVector(b, 1); + Buffer.createBuffer(b, padding, 0, 0, length); + int buffers = b.endVector(); + + int array = io.github.dfa1.vortex.fbs.Array.createArray(b, root, buffers); + io.github.dfa1.vortex.fbs.Array.finishArrayBuffer(b, array); + return b.sizedByteArray(); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java index 2b2b1cfd..3a1cbbe5 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java @@ -131,6 +131,32 @@ void parseBlobs_decimalScale_abovePrecision_throws() { .hasMessageContaining("scale"); } + // ── Layout metadata-size bound: metadata.remaining() > MAX_LAYOUT_METADATA_BYTES ── + + @Test + void parseBlobs_layoutMetadata_atLimit_parses() { + // Given — metadata of exactly MAX_LAYOUT_METADATA_BYTES (the largest allowed). `> MAX` is + // false at the limit, so it must parse; kills `>` relaxed to `>=`, which would reject it. + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayoutWithMetadata(PostscriptParser.MAX_LAYOUT_METADATA_BYTES); + + // When / Then + assertThatCode(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .doesNotThrowAnyException(); + } + + @Test + void parseBlobs_layoutMetadata_oneOverLimit_throws() { + // Given — one byte past the cap + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayoutWithMetadata(PostscriptParser.MAX_LAYOUT_METADATA_BYTES + 1); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("metadata size"); + } + // ── helpers ────────────────────────────────────────────────────────────────── /// Parses a dtype blob through the full parseBlobs path, paired with a minimal valid @@ -165,6 +191,15 @@ private static ByteBuffer flatLayout(int encodingIdx) { return slice(fbb); } + private static ByteBuffer flatLayoutWithMetadata(int metadataBytes) { + var fbb = new FlatBufferBuilder(metadataBytes + 128); + int meta = Layout.createMetadataVector(fbb, new byte[metadataBytes]); + int segV = Layout.createSegmentsVector(fbb, new long[]{0}); + int off = Layout.createLayout(fbb, 0, 1L, meta, 0, segV); + Layout.finishLayoutBuffer(fbb, off); + return slice(fbb); + } + private static ByteBuffer nestedLayout(int depth) { var fbb = new FlatBufferBuilder(depth * 32 + 64); int segV = Layout.createSegmentsVector(fbb, new long[]{0}); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java new file mode 100644 index 00000000..a2592478 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java @@ -0,0 +1,65 @@ +package io.github.dfa1.vortex.reader; + +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.core.VortexFormat; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Boundary coverage for [Trailer#parse]'s postscript-length guard `postscriptLen > bodyBytes`. +/// The largest legal postscript fills the entire file body (`postscriptLen == bodyBytes`) and must +/// parse; one byte more overruns the body and must throw. Relaxing `>` to `>=` would reject the +/// exact-fill case — this pins that edge. +class TrailerLengthBoundaryTest { + + private static final ValueLayout.OfShort LE_SHORT = + ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + /// Builds a well-formed 8-byte trailer (`version | postscriptLen | magic`) with the given + /// postscript length, so only the length-vs-body check is under test. + private static MemorySegment trailer(int postscriptLen) { + MemorySegment seg = MemorySegment.ofArray(new byte[VortexFormat.TRAILER_SIZE]); + seg.set(LE_SHORT, 0, (short) VortexFormat.VERSION); + seg.set(LE_SHORT, 2, (short) postscriptLen); + MemorySegment.copy(VortexFormat.MAGIC, 0, seg, 4, VortexFormat.MAGIC_SIZE); + return seg; + } + + @Test + void parse_postscriptLengthEqualsBody_parses() { + // Given — postscript exactly fills the body: postscriptLen == bodyBytes (the upper edge) + MemorySegment sut = trailer(64); + + // When + Trailer result = Trailer.parse(sut, 64); + + // Then — accepted, length preserved (kills `>` relaxed to `>=`) + assertThat(result.postscriptLen()).isEqualTo(64); + } + + @Test + void parse_postscriptLengthOneOverBody_throws() { + // Given — postscript one byte past the body + MemorySegment sut = trailer(65); + + // When / Then + assertThatThrownBy(() -> Trailer.parse(sut, 64)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("exceeds file body size"); + } + + @Test + void parse_postscriptLengthWellWithinBody_parses() { + // Given — a comfortably in-range postscript, as a sanity anchor for the boundary cases + MemorySegment sut = trailer(10); + + // When / Then + assertThatCode(() -> Trailer.parse(sut, 1_000)).doesNotThrowAnyException(); + } +} From 99cf5107a048213bbd0dadcd586abb0db3a169f7 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 16:13:33 +0200 Subject: [PATCH 2/2] refactor(reader): drop dead `length < 0` blob check; reuse PTypeIO LE layouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups: - checkBlobBounds: drop the `length < 0` clause. Every caller passes a u32-masked PostscriptSegment.length() (`bb.getInt(..) & 0xFFFFFFFFL`), always >= 0, so the branch is unreachable — the equivalent mutant pitest flagged. Removing it takes the reader file-structure classes to a 100% mutation score (the only non-killed mutation is a depth-recursion timeout, which pitest counts as detected). The `offset < 0` check stays — offset is u64 and can read back negative. - Tests: reuse the public PTypeIO.LE_INT / LE_SHORT layout constants instead of re-declaring them locally, and inline the single-use I32 dtype (no shared DType constant exists — it is duplicated per test file). Co-Authored-By: Claude Opus 4.8 --- .../github/dfa1/vortex/reader/PostscriptParser.java | 10 ++++++---- .../vortex/reader/FlatSegmentDecoderDecodeTest.java | 11 +++-------- .../dfa1/vortex/reader/TrailerLengthBoundaryTest.java | 6 +----- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java index 51549be4..40c53a30 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java @@ -90,10 +90,12 @@ static void validateSegmentSpecs(List specs, long fileSize) { } private static void checkBlobBounds(String name, long offset, long length, long fileSize) { - // Same overflow-safe range form as IoBounds.checkRange (no redundant `offset > fileSize` - // clause: length >= 0 makes it implied by the final comparison). Keeps the blob-named - // message that checkRange's generic text would lose. - if (offset < 0 || length < 0 || length > fileSize - offset) { + // Overflow-safe containment in [0, fileSize], keeping the blob-named message that + // IoBounds.checkRange's generic text would lose. Two clauses checkRange carries are + // omitted because they are unreachable here: every caller passes a u32-masked + // PostscriptSegment.length() (always >= 0, so no `length < 0` check), which in turn makes + // an `offset > fileSize` check redundant — it is already implied by the final comparison. + if (offset < 0 || length > fileSize - offset) { throw new VortexException( "postscript " + name + " blob out of bounds: offset=" + offset + " length=" + length + " fileSize=" + fileSize); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java index 5666e058..e1d6d06a 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java @@ -11,10 +11,9 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.nio.ByteOrder; import java.util.List; +import static io.github.dfa1.vortex.encoding.PTypeIO.LE_INT; import static org.assertj.core.api.Assertions.assertThat; /// Successful flat-segment decode path — complements [FlatSegmentBoundsSecurityTest] (which only @@ -27,11 +26,6 @@ /// null node and the decode would not produce an `UnknownArray`. class FlatSegmentDecoderDecodeTest { - private static final ValueLayout.OfInt LE_INT = - ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - - private static final DType DTYPE = new DType.Primitive(PType.I32, false); - @Test void decode_unknownEncodingWithBufferPadding_returnsUnknownArray() { ReadRegistry registry = ReadRegistry.builder().allowUnknown().build(); @@ -49,7 +43,8 @@ void decode_unknownEncodingWithBufferPadding_returnsUnknownArray() { seg.set(LE_INT, padding + fb.length, fb.length); // When — encoding index 0 maps to an id no decoder handles - Array result = sut.decode(seg, List.of("vortex.nonexistent"), DTYPE, 0, arena); + Array result = sut.decode(seg, List.of("vortex.nonexistent"), + new DType.Primitive(PType.I32, false), 0, arena); // Then — the allow-unknown path produced an UnknownArray (proves both the +padding // walk and the UnknownArrayNode fallback ran) diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java index a2592478..9bef76cc 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java @@ -5,9 +5,8 @@ import org.junit.jupiter.api.Test; import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.nio.ByteOrder; +import static io.github.dfa1.vortex.encoding.PTypeIO.LE_SHORT; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -18,9 +17,6 @@ /// exact-fill case — this pins that edge. class TrailerLengthBoundaryTest { - private static final ValueLayout.OfShort LE_SHORT = - ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - /// Builds a well-formed 8-byte trailer (`version | postscriptLen | magic`) with the given /// postscript length, so only the length-vs-body check is under test. private static MemorySegment trailer(int postscriptLen) {