use of org.apache.parquet.internal.column.columnindex.ColumnIndex in project presto by prestodb.
the class TestColumnIndexBuilder method testBuildUInt8.
@Test
public void testBuildUInt8() {
PrimitiveType type = Types.required(INT32).as(UINT_8).named("test_uint8");
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
// assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
assertNull(builder.build());
Operators.IntColumn col = intColumn("test_col");
StatsBuilder sb = new StatsBuilder();
builder.add(sb.stats(type, 4, 10));
builder.add(sb.stats(type, 11, 17, null));
builder.add(sb.stats(type, 2, 2, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, 1, 0xFF));
builder.add(sb.stats(type, 0xEF, 0xFA));
assertEquals(6, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
ColumnIndex columnIndex = builder.build();
assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
assertCorrectValues(columnIndex.getMaxValues(), 10, 17, 2, null, 0xFF, 0xFA);
assertCorrectValues(columnIndex.getMinValues(), 4, 11, 2, null, 1, 0xEF);
assertCorrectFiltering(columnIndex, eq(col, 2), 2, 4);
assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, lt(col, 0xEF), 0, 1, 2, 4);
assertCorrectFiltering(columnIndex, ltEq(col, 0xEF), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 4, 5);
assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, 0, 0, null, null));
builder.add(sb.stats(type, 0, 42, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, 42, 0xEE));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, 0xEF, 0xFF));
builder.add(sb.stats(type, null, null));
assertEquals(9, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
assertCorrectValues(columnIndex.getMaxValues(), null, 0, 42, null, null, 0xEE, null, 0xFF, null);
assertCorrectValues(columnIndex.getMinValues(), null, 0, 0, null, null, 42, null, 0xEF, null);
assertCorrectFiltering(columnIndex, eq(col, 2), 2);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
assertCorrectFiltering(columnIndex, gtEq(col, 0xEE), 5, 7);
assertCorrectFiltering(columnIndex, lt(col, 42), 1, 2);
assertCorrectFiltering(columnIndex, ltEq(col, 42), 1, 2, 5);
assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null, null, null, null));
builder.add(sb.stats(type, 0xFF, 0xFF));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, 0xEF, 0xEA, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, 0xEE, 42));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, 41, 0));
assertEquals(9, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
assertCorrectValues(columnIndex.getMaxValues(), null, 0xFF, null, 0xEF, null, 0xEE, null, null, 41);
assertCorrectValues(columnIndex.getMinValues(), null, 0xFF, null, 0xEA, null, 42, null, null, 0);
assertCorrectFiltering(columnIndex, eq(col, 0xAB), 5);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 0xFF));
assertCorrectFiltering(columnIndex, gtEq(col, 0xFF), 1);
assertCorrectFiltering(columnIndex, lt(col, 42), 8);
assertCorrectFiltering(columnIndex, ltEq(col, 42), 5, 8);
assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 2, 3, 4, 5, 6, 7, 8);
}
use of org.apache.parquet.internal.column.columnindex.ColumnIndex in project presto by prestodb.
the class TestColumnIndexBuilder method testBuildBinaryDecimal.
@Test
public void testBuildBinaryDecimal() {
PrimitiveType type = Types.required(BINARY).as(DECIMAL).precision(12).scale(2).named("test_binary_decimal");
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
// assertThat(builder, instanceOf(BinaryColumnIndexBuilder.class));
assertNull(builder.build());
Operators.BinaryColumn col = binaryColumn("test_col");
StatsBuilder sb = new StatsBuilder();
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("1234567890.12")));
builder.add(sb.stats(type, decimalBinary("-234.23"), null, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("2348978.45")));
builder.add(sb.stats(type, null, null, null, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("87656273")));
assertEquals(8, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
ColumnIndex columnIndex = builder.build();
assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 2, 0, 3, 3, 0, 4, 2, 0);
assertCorrectNullPages(columnIndex, true, false, false, true, false, true, true, false);
assertCorrectValues(columnIndex.getMaxValues(), null, decimalBinary("1234567890.12"), decimalBinary("-234.23"), null, decimalBinary("2348978.45"), null, null, decimalBinary("87656273"));
assertCorrectValues(columnIndex.getMinValues(), null, decimalBinary("-0.17"), decimalBinary("-234.23"), null, decimalBinary("-9999293.23"), null, null, decimalBinary("87656273"));
assertCorrectFiltering(columnIndex, eq(col, decimalBinary("0.0")), 1, 4);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 7);
assertCorrectFiltering(columnIndex, gt(col, decimalBinary("2348978.45")), 1);
assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("2348978.45")), 1, 4);
assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-234.23")), 4);
assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-234.23")), 2, 4);
assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 7);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null, null, null));
builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("-234.23")));
builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("87656273")));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("87656273")));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
builder.add(sb.stats(type, null, null, null));
assertEquals(8, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 4, 0, 0, 2, 0, 2, 3, 3);
assertCorrectNullPages(columnIndex, true, false, false, true, false, true, false, true);
assertCorrectValues(columnIndex.getMaxValues(), null, decimalBinary("-234.23"), decimalBinary("87656273"), null, decimalBinary("87656273"), null, decimalBinary("1234567890.12"), null);
assertCorrectValues(columnIndex.getMinValues(), null, decimalBinary("-9999293.23"), decimalBinary("-0.17"), null, decimalBinary("87656273"), null, decimalBinary("1234567890.12"), null);
assertCorrectFiltering(columnIndex, eq(col, decimalBinary("87656273")), 2, 4);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 3, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 6);
assertCorrectFiltering(columnIndex, gt(col, decimalBinary("87656273")), 6);
assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("87656273")), 2, 4, 6);
assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 1);
assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 1, 2);
assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 2, 3, 5, 6, 7);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 6);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
builder.add(sb.stats(type, null, null, null, null));
builder.add(sb.stats(type, decimalBinary("1234567890.12"), decimalBinary("87656273")));
builder.add(sb.stats(type, decimalBinary("987656273"), decimalBinary("-0.17")));
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, decimalBinary("-234.23"), decimalBinary("-9999293.23")));
assertEquals(8, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 3, 2, 3, 4, 0, 0, 2, 0);
assertCorrectNullPages(columnIndex, true, true, false, true, false, false, true, false);
assertCorrectValues(columnIndex.getMaxValues(), null, null, decimalBinary("1234567890.12"), null, decimalBinary("1234567890.12"), decimalBinary("987656273"), null, decimalBinary("-234.23"));
assertCorrectValues(columnIndex.getMinValues(), null, null, decimalBinary("1234567890.12"), null, decimalBinary("87656273"), decimalBinary("-0.17"), null, decimalBinary("-9999293.23"));
assertCorrectFiltering(columnIndex, eq(col, decimalBinary("1234567890.12")), 2, 4);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 6);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("0.0")), 0, 1, 2, 3, 4, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 2, 4, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, decimalBinary("1234567890.12")));
assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("1234567890.12")), 2, 4);
assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 7);
assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 5, 7);
assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 5, 6);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 2, 4, 5, 7);
}
use of org.apache.parquet.internal.column.columnindex.ColumnIndex in project presto by prestodb.
the class TestColumnIndexBuilder method testStaticBuildBinary.
@Test
public void testStaticBuildBinary() {
ColumnIndex columnIndex = ColumnIndexBuilder.build(Types.required(BINARY).as(UTF8).named("test_binary_utf8"), BoundaryOrder.ASCENDING, asList(true, true, false, false, true, false, true, false), asList(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), toBBList(null, null, stringBinary("Beeblebrox"), stringBinary("Dent"), null, stringBinary("Jeltz"), null, stringBinary("Slartibartfast")), toBBList(null, null, stringBinary("Dent"), stringBinary("Dent"), null, stringBinary("Prefect"), null, stringBinary("Slartibartfast")));
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 1, 2, 3, 4, 5, 6, 7, 8);
assertCorrectNullPages(columnIndex, true, true, false, false, true, false, true, false);
assertCorrectValues(columnIndex.getMaxValues(), null, null, stringBinary("Dent"), stringBinary("Dent"), null, stringBinary("Prefect"), null, stringBinary("Slartibartfast"));
assertCorrectValues(columnIndex.getMinValues(), null, null, stringBinary("Beeblebrox"), stringBinary("Dent"), null, stringBinary("Jeltz"), null, stringBinary("Slartibartfast"));
}
use of org.apache.parquet.internal.column.columnindex.ColumnIndex in project presto by prestodb.
the class TestColumnIndexBuilder method testStaticBuildInt32.
@Test
public void testStaticBuildInt32() {
ColumnIndex columnIndex = ColumnIndexBuilder.build(Types.required(INT32).named("test_int32"), BoundaryOrder.DESCENDING, asList(false, false, false, true, true, true), asList(0L, 10L, 0L, 3L, 5L, 7L), toBBList(10, 8, 6, null, null, null), toBBList(9, 7, 5, null, null, null));
assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 0, 10, 0, 3, 5, 7);
assertCorrectNullPages(columnIndex, false, false, false, true, true, true);
assertCorrectValues(columnIndex.getMaxValues(), 9, 7, 5, null, null, null);
assertCorrectValues(columnIndex.getMinValues(), 10, 8, 6, null, null, null);
}
use of org.apache.parquet.internal.column.columnindex.ColumnIndex in project presto by prestodb.
the class TestColumnIndexBuilder method testBuildBoolean.
@Test
public void testBuildBoolean() {
PrimitiveType type = Types.required(BOOLEAN).named("test_boolean");
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
// assertThat(builder, instanceOf(BooleanColumnIndexBuilder.class));
assertNull(builder.build());
Operators.BooleanColumn col = booleanColumn("test_col");
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
StatsBuilder sb = new StatsBuilder();
builder.add(sb.stats(type, false, true));
builder.add(sb.stats(type, true, false, null));
builder.add(sb.stats(type, true, true, null, null));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, false, false));
assertEquals(5, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
ColumnIndex columnIndex = builder.build();
assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0);
assertCorrectNullPages(columnIndex, false, false, false, true, false);
assertCorrectValues(columnIndex.getMaxValues(), true, true, true, null, false);
assertCorrectValues(columnIndex.getMinValues(), false, false, true, null, false);
assertCorrectFiltering(columnIndex, eq(col, true), 0, 1, 2);
assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4);
assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 1, 2, 3);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 0, 1, 4);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, false, false));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, null, null, null, null));
builder.add(sb.stats(type, false, true, null));
builder.add(sb.stats(type, false, true, null, null));
builder.add(sb.stats(type, null, null, null));
assertEquals(7, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 2, 0, 3, 4, 1, 2, 3);
assertCorrectNullPages(columnIndex, true, false, true, true, false, false, true);
assertCorrectValues(columnIndex.getMaxValues(), null, false, null, null, true, true, null);
assertCorrectValues(columnIndex.getMinValues(), null, false, null, null, false, false, null);
assertCorrectFiltering(columnIndex, eq(col, true), 4, 5);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 1, 4, 5);
builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
sb = new StatsBuilder();
builder.add(sb.stats(type, null, null));
builder.add(sb.stats(type, true, true));
builder.add(sb.stats(type, null, null, null));
builder.add(sb.stats(type, null, null, null, null));
builder.add(sb.stats(type, true, false, null));
builder.add(sb.stats(type, false, false, null, null));
builder.add(sb.stats(type, null, null, null));
assertEquals(7, builder.getPageCount());
assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
columnIndex = builder.build();
assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
assertCorrectNullCounts(columnIndex, 2, 0, 3, 4, 1, 2, 3);
assertCorrectNullPages(columnIndex, true, false, true, true, false, false, true);
assertCorrectValues(columnIndex.getMaxValues(), null, true, null, null, true, false, null);
assertCorrectValues(columnIndex.getMinValues(), null, true, null, null, false, false, null);
assertCorrectFiltering(columnIndex, eq(col, true), 1, 4);
assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 4, 5);
}
Aggregations