diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java index 51549be4..40c53a30 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java @@ -90,10 +90,12 @@ static void validateSegmentSpecs(List specs, long fileSize) { } private static void checkBlobBounds(String name, long offset, long length, long fileSize) { - // Same overflow-safe range form as IoBounds.checkRange (no redundant `offset > fileSize` - // clause: length >= 0 makes it implied by the final comparison). Keeps the blob-named - // message that checkRange's generic text would lose. - if (offset < 0 || length < 0 || length > fileSize - offset) { + // Overflow-safe containment in [0, fileSize], keeping the blob-named message that + // IoBounds.checkRange's generic text would lose. Two clauses checkRange carries are + // omitted because they are unreachable here: every caller passes a u32-masked + // PostscriptSegment.length() (always >= 0, so no `length < 0` check), which in turn makes + // an `offset > fileSize` check redundant — it is already implied by the final comparison. + if (offset < 0 || length > fileSize - offset) { throw new VortexException( "postscript " + name + " blob out of bounds: offset=" + offset + " length=" + length + " fileSize=" + fileSize); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java new file mode 100644 index 00000000..e1d6d06a --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentDecoderDecodeTest.java @@ -0,0 +1,71 @@ +package io.github.dfa1.vortex.reader; + +import com.google.flatbuffers.FlatBufferBuilder; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.fbs.ArrayNode; +import io.github.dfa1.vortex.fbs.Buffer; +import io.github.dfa1.vortex.reader.array.Array; +import io.github.dfa1.vortex.reader.array.UnknownArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.util.List; + +import static io.github.dfa1.vortex.encoding.PTypeIO.LE_INT; +import static org.assertj.core.api.Assertions.assertThat; + +/// Successful flat-segment decode path — complements [FlatSegmentBoundsSecurityTest] (which only +/// drives the rejection paths). A buffer descriptor with non-zero padding exercises the offset +/// walk, and an unknown encoding id exercises the `UnknownArrayNode` fallback through an +/// allow-unknown registry. Together these pin two otherwise-untested spots: +/// - the `dataOffset += padding` accumulation: with padding > 0, flipping `+=` to `-=` slices at a +/// negative offset and fails, so a clean decode proves the addition. +/// - the `orElseGet(() -> new UnknownArrayNode(...))` fallback: returning `null` there yields a +/// null node and the decode would not produce an `UnknownArray`. +class FlatSegmentDecoderDecodeTest { + + @Test + void decode_unknownEncodingWithBufferPadding_returnsUnknownArray() { + ReadRegistry registry = ReadRegistry.builder().allowUnknown().build(); + FlatSegmentDecoder sut = new FlatSegmentDecoder(registry); + + try (Arena arena = Arena.ofConfined()) { + // Given — a flat segment whose single buffer carries 8 bytes of leading padding and + // an unrecognised encoding id. The 8 pad bytes are the entire buffer-data region + // (buffer length 0), so the decoder must advance dataOffset by +8 to slice it; a + // subtraction would slice at offset -8. + int padding = 8; + byte[] fb = arrayFlatBufferOneBuffer(padding, 0L); + MemorySegment seg = arena.allocate((long) padding + fb.length + 4); + MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, padding, fb.length); + seg.set(LE_INT, padding + fb.length, fb.length); + + // When — encoding index 0 maps to an id no decoder handles + Array result = sut.decode(seg, List.of("vortex.nonexistent"), + new DType.Primitive(PType.I32, false), 0, arena); + + // Then — the allow-unknown path produced an UnknownArray (proves both the +padding + // walk and the UnknownArrayNode fallback ran) + assertThat(result).isInstanceOf(UnknownArray.class); + } + } + + /// Builds an `Array` FlatBuffer with a single buffer descriptor of the given padding/length. + private static byte[] arrayFlatBufferOneBuffer(int padding, long length) { + FlatBufferBuilder b = new FlatBufferBuilder(); + + int rootChildren = ArrayNode.createChildrenVector(b, new int[0]); + int rootBuffers = ArrayNode.createBuffersVector(b, new int[]{0}); + int root = ArrayNode.createArrayNode(b, 0, 0, rootChildren, rootBuffers, 0); + + io.github.dfa1.vortex.fbs.Array.startBuffersVector(b, 1); + Buffer.createBuffer(b, padding, 0, 0, length); + int buffers = b.endVector(); + + int array = io.github.dfa1.vortex.fbs.Array.createArray(b, root, buffers); + io.github.dfa1.vortex.fbs.Array.finishArrayBuffer(b, array); + return b.sizedByteArray(); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java index 2b2b1cfd..3a1cbbe5 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java @@ -131,6 +131,32 @@ void parseBlobs_decimalScale_abovePrecision_throws() { .hasMessageContaining("scale"); } + // ── Layout metadata-size bound: metadata.remaining() > MAX_LAYOUT_METADATA_BYTES ── + + @Test + void parseBlobs_layoutMetadata_atLimit_parses() { + // Given — metadata of exactly MAX_LAYOUT_METADATA_BYTES (the largest allowed). `> MAX` is + // false at the limit, so it must parse; kills `>` relaxed to `>=`, which would reject it. + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayoutWithMetadata(PostscriptParser.MAX_LAYOUT_METADATA_BYTES); + + // When / Then + assertThatCode(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .doesNotThrowAnyException(); + } + + @Test + void parseBlobs_layoutMetadata_oneOverLimit_throws() { + // Given — one byte past the cap + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayoutWithMetadata(PostscriptParser.MAX_LAYOUT_METADATA_BYTES + 1); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("metadata size"); + } + // ── helpers ────────────────────────────────────────────────────────────────── /// Parses a dtype blob through the full parseBlobs path, paired with a minimal valid @@ -165,6 +191,15 @@ private static ByteBuffer flatLayout(int encodingIdx) { return slice(fbb); } + private static ByteBuffer flatLayoutWithMetadata(int metadataBytes) { + var fbb = new FlatBufferBuilder(metadataBytes + 128); + int meta = Layout.createMetadataVector(fbb, new byte[metadataBytes]); + int segV = Layout.createSegmentsVector(fbb, new long[]{0}); + int off = Layout.createLayout(fbb, 0, 1L, meta, 0, segV); + Layout.finishLayoutBuffer(fbb, off); + return slice(fbb); + } + private static ByteBuffer nestedLayout(int depth) { var fbb = new FlatBufferBuilder(depth * 32 + 64); int segV = Layout.createSegmentsVector(fbb, new long[]{0}); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java new file mode 100644 index 00000000..9bef76cc --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/TrailerLengthBoundaryTest.java @@ -0,0 +1,61 @@ +package io.github.dfa1.vortex.reader; + +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.core.VortexFormat; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; + +import static io.github.dfa1.vortex.encoding.PTypeIO.LE_SHORT; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Boundary coverage for [Trailer#parse]'s postscript-length guard `postscriptLen > bodyBytes`. +/// The largest legal postscript fills the entire file body (`postscriptLen == bodyBytes`) and must +/// parse; one byte more overruns the body and must throw. Relaxing `>` to `>=` would reject the +/// exact-fill case — this pins that edge. +class TrailerLengthBoundaryTest { + + /// Builds a well-formed 8-byte trailer (`version | postscriptLen | magic`) with the given + /// postscript length, so only the length-vs-body check is under test. + private static MemorySegment trailer(int postscriptLen) { + MemorySegment seg = MemorySegment.ofArray(new byte[VortexFormat.TRAILER_SIZE]); + seg.set(LE_SHORT, 0, (short) VortexFormat.VERSION); + seg.set(LE_SHORT, 2, (short) postscriptLen); + MemorySegment.copy(VortexFormat.MAGIC, 0, seg, 4, VortexFormat.MAGIC_SIZE); + return seg; + } + + @Test + void parse_postscriptLengthEqualsBody_parses() { + // Given — postscript exactly fills the body: postscriptLen == bodyBytes (the upper edge) + MemorySegment sut = trailer(64); + + // When + Trailer result = Trailer.parse(sut, 64); + + // Then — accepted, length preserved (kills `>` relaxed to `>=`) + assertThat(result.postscriptLen()).isEqualTo(64); + } + + @Test + void parse_postscriptLengthOneOverBody_throws() { + // Given — postscript one byte past the body + MemorySegment sut = trailer(65); + + // When / Then + assertThatThrownBy(() -> Trailer.parse(sut, 64)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("exceeds file body size"); + } + + @Test + void parse_postscriptLengthWellWithinBody_parses() { + // Given — a comfortably in-range postscript, as a sanity anchor for the boundary cases + MemorySegment sut = trailer(10); + + // When / Then + assertThatCode(() -> Trailer.parse(sut, 1_000)).doesNotThrowAnyException(); + } +}