From bbd32281a326c5f61d0ed050b54aaabd3a58b2a7 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 21:31:12 +0200 Subject: [PATCH] test(reader): bump HTTP fixtures to v0.75.0; smoke-test all encodings Bump the S3 compatibility bucket from v0.72.0 to v0.75.0 and add a parameterized smoke test that opens and decodes every published encoding fixture, catching upstream wire-format drift across the full set. zstd.vortex is now dictionary-compressed, which the pure-Java decoder cannot handle (aircompressor has no Zstd dictionary support); assert the fail-fast VortexException in a dedicated test instead of decoding it. Tracked by #104 / airlift/aircompressor#119. masked/patched/variant fixtures are still unpublished at v0.75.0 (decoders already exist), so that test stays @Disabled with an updated reason and a corrected, honest method name. Co-Authored-By: Claude Opus 4.8 --- .../vortex/reader/VortexHttpReaderIT.java | 70 +++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/VortexHttpReaderIT.java b/reader/src/test/java/io/github/dfa1/vortex/reader/VortexHttpReaderIT.java index 3a9db8e2..da3c087b 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/VortexHttpReaderIT.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/VortexHttpReaderIT.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; import io.github.dfa1.vortex.reader.array.ListArray; import io.github.dfa1.vortex.reader.array.ListViewArray; @@ -15,6 +16,7 @@ import java.util.List; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /// Integration test: reads real Vortex files from the public S3 compatibility bucket /// via HTTP Range requests and validates structure + data. @@ -23,7 +25,7 @@ @Tag("integration") class VortexHttpReaderIT { - private static final URI BASE = URI.create("https://vortex-compat-fixtures.s3.amazonaws.com/v0.72.0/arrays/"); + private static final URI BASE = URI.create("https://vortex-compat-fixtures.s3.amazonaws.com/v0.75.0/arrays/"); private static final URI TPCH_LINEITEM = BASE.resolve("tpch_lineitem.compact.vortex"); @@ -78,13 +80,71 @@ void scan_forVortex_decodesAllRows() throws Exception { assertThat(totalRows).isGreaterThan(0); } - // vortex.masked / vortex.patched / vortex.variant: IDs known, decoders not yet implemented. - // No S3 fixture exists in v0.72.0 — enable these tests once fixtures are published. + // Smoke test: every published encoding fixture must open and decode all its rows. + // Catches upstream wire-format drift across the full encoding set in one place; the + // structural tests below (for/list/listview) assert shape for the trickier layouts. + // Excludes the multi-MB clickbench/tpch fixtures (covered by dedicated tests) to keep + // this fast over HTTP. + @ParameterizedTest + @ValueSource(strings = { + "alp.vortex", "alprd.vortex", "bitpacked.vortex", "booleans.vortex", "bytebool.vortex", + "chunked.vortex", "constant.vortex", "datetime.vortex", "datetimeparts.vortex", + "decimal.vortex", "decimal_byte_parts.vortex", "dict.vortex", "fixed_size_list.vortex", + "fsst.vortex", "null.vortex", "pco.vortex", "primitives.vortex", "rle.vortex", + "runend.vortex", "sequence.vortex", "sparse.vortex", "struct_nested.vortex", + "varbin.vortex", "varbinview.vortex", "zigzag.vortex" + // zstd.vortex omitted: dictionary-compressed, unsupported by the pure-Java decoder + // (see scan_zstdVortex_rejectsDictionaryCompression). + }) + void scan_publishedFixture_decodesAllRows(String fixture) throws Exception { + // Given + URI uri = BASE.resolve(fixture); + + // When + long totalRows = 0; + try (var sut = VortexHttpReader.open(uri); + var iter = sut.scan(ScanOptions.all())) { + while (iter.hasNext()) { + try (var c = iter.next()) { + totalRows += c.rowCount(); + } + } + } + + // Then + assertThat(totalRows).isGreaterThan(0); + } + + // The published zstd.vortex fixture is dictionary-compressed; the pure-Java decoder has no + // Zstd dictionary support and must fail fast with a clear message rather than mis-decode. + // Tracked by https://github.com/dfa1/vortex-java/issues/104 (upstream airlift/aircompressor#119). + @Test + void scan_zstdVortex_rejectsDictionaryCompression() throws Exception { + // Given + URI uri = BASE.resolve("zstd.vortex"); + + // When / Then + try (var sut = VortexHttpReader.open(uri); + var iter = sut.scan(ScanOptions.all())) { + assertThatThrownBy(() -> { + while (iter.hasNext()) { + try (var c = iter.next()) { + c.rowCount(); + } + } + }) + .isInstanceOf(VortexException.class) + .hasMessageContaining("dictionary-compressed Zstd segments are not supported"); + } + } + + // vortex.masked / vortex.patched / vortex.variant: decoders implemented, but no S3 fixture + // is published (still absent at v0.75.0) — enable this test once fixtures exist upstream. - @Disabled("no S3 fixture in v0.72.0") + @Disabled("no S3 fixture through v0.75.0") @ParameterizedTest @ValueSource(strings = {"masked.vortex", "patched.vortex", "variant.vortex"}) - void scan_unimplementedEncoding_decodesAllRows(String fixture) throws Exception { + void scan_unpublishedFixture_decodesAllRows(String fixture) throws Exception { // Given URI uri = BASE.resolve(fixture);