Search in sources :

Example 11 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildDouble.

@Test
public void testBuildDouble() {
    PrimitiveType type = Types.required(DOUBLE).named("test_double");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(DoubleColumnIndexBuilder.class));
    assertNull(builder.build());
    DoubleColumn col = doubleColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4.2, -4.1));
    builder.add(sb.stats(type, -11.7, 7.0, null));
    builder.add(sb.stats(type, 2.2, 2.2, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1.9, 2.32));
    builder.add(sb.stats(type, -21.0, 8.1));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), -4.1, 7.0, 2.2, null, 2.32, 8.1);
    assertCorrectValues(columnIndex.getMinValues(), -4.2, -11.7, 2.2, null, 1.9, -21.0);
    assertCorrectFiltering(columnIndex, eq(col, 0.0), 1, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    Set<Double> set1 = new HashSet<>();
    set1.add(0.0);
    set1.add(-4.2);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 2, 3, 4);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 4, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.2), 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, -4.2), 1, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, -4.2), 0, 1, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 4, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532.3, -345.2, null, null));
    builder.add(sb.stats(type, -234.7, -234.6, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -234.6, 2.99999));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 3.0, 42.83));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345.2, -234.6, null, null, 2.99999, null, 42.83, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532.3, -234.7, null, null, -234.6, null, 3.0, null);
    assertCorrectFiltering(columnIndex, eq(col, 0.0), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    Set<Double> set2 = new HashSet<>();
    set2.add(0.0);
    set2.add(3.5);
    set2.add(-346.0);
    assertCorrectFiltering(columnIndex, in(col, set2), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 2, 3, 4, 6, 8);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notIn(col, set2), new int[0]);
    assertCorrectFiltering(columnIndex, notEq(col, 0.0), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2.99999), 7);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.99999), 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, -234.6), 1, 2);
    assertCorrectFiltering(columnIndex, ltEq(col, -234.6), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532.3, 345.2));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234.7, 234.6, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 234.69, -2.99999));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3.0, -42.83));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532.3, null, 234.7, null, 234.69, null, null, -3.0);
    assertCorrectValues(columnIndex.getMinValues(), null, 345.2, null, 234.6, null, -2.99999, null, null, -42.83);
    assertCorrectFiltering(columnIndex, eq(col, 234.6), 3, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    Set<Double> set3 = new HashSet<>();
    set3.add(234.6);
    assertCorrectFiltering(columnIndex, in(col, set3), 3, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 6, 7, 8);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 234.69), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, -2.99999), 8);
    assertCorrectFiltering(columnIndex, ltEq(col, -2.99999), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : DoubleColumn(org.apache.parquet.filter2.predicate.Operators.DoubleColumn) PrimitiveType(org.apache.parquet.schema.PrimitiveType) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildDoubleZeroNaN.

@Test
public void testBuildDoubleZeroNaN() {
    PrimitiveType type = Types.required(DOUBLE).named("test_double");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -1.0, -0.0));
    builder.add(sb.stats(type, 0.0, 1.0));
    builder.add(sb.stats(type, 1.0, 100.0));
    ColumnIndex columnIndex = builder.build();
    assertCorrectValues(columnIndex.getMinValues(), -1.0, -0.0, 1.0);
    assertCorrectValues(columnIndex.getMaxValues(), 0.0, 1.0, 100.0);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    builder.add(sb.stats(type, -1.0, -0.0));
    builder.add(sb.stats(type, 0.0, Double.NaN));
    builder.add(sb.stats(type, 1.0, 100.0));
    assertNull(builder.build());
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.junit.Test)

Example 13 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildUInt8.

@Test
public void testBuildUInt8() {
    PrimitiveType type = Types.required(INT32).as(UINT_8).named("test_uint8");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
    assertNull(builder.build());
    IntColumn col = intColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, 4, 10));
    builder.add(sb.stats(type, 11, 17, null));
    builder.add(sb.stats(type, 2, 2, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1, 0xFF));
    builder.add(sb.stats(type, 0xEF, 0xFA));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), 10, 17, 2, null, 0xFF, 0xFA);
    assertCorrectValues(columnIndex.getMinValues(), 4, 11, 2, null, 1, 0xEF);
    assertCorrectFiltering(columnIndex, eq(col, 2), 2, 4);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    Set<Integer> set1 = new HashSet<>();
    set1.add(2);
    assertCorrectFiltering(columnIndex, in(col, set1), 2, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 3, 5);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 5);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, 0xEF), 0, 1, 2, 4);
    assertCorrectFiltering(columnIndex, ltEq(col, 0xEF), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 0, 0, null, null));
    builder.add(sb.stats(type, 0, 42, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 42, 0xEE));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 0xEF, 0xFF));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, 0, 42, null, null, 0xEE, null, 0xFF, null);
    assertCorrectValues(columnIndex.getMinValues(), null, 0, 0, null, null, 42, null, 0xEF, null);
    assertCorrectFiltering(columnIndex, eq(col, 2), 2);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    Set<Integer> set2 = new HashSet<>();
    set2.add(2);
    assertCorrectFiltering(columnIndex, in(col, set2), 2);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 4, 5, 6, 7, 8);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 6, 8);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 5, 7);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
    assertCorrectFiltering(columnIndex, gtEq(col, 0xEE), 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, 42), 1, 2);
    assertCorrectFiltering(columnIndex, ltEq(col, 42), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 0xFF, 0xFF));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 0xEF, 0xEA, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 0xEE, 42));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 41, 0));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 0xFF, null, 0xEF, null, 0xEE, null, null, 41);
    assertCorrectValues(columnIndex.getMinValues(), null, 0xFF, null, 0xEA, null, 42, null, null, 0);
    assertCorrectFiltering(columnIndex, eq(col, 0xAB), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    Set<Integer> set3 = new HashSet<>();
    set3.add(0xAB);
    assertCorrectFiltering(columnIndex, in(col, set3), 5);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7, 8);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 0xFF));
    assertCorrectFiltering(columnIndex, gtEq(col, 0xFF), 1);
    assertCorrectFiltering(columnIndex, lt(col, 42), 8);
    assertCorrectFiltering(columnIndex, ltEq(col, 42), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 2, 3, 4, 5, 6, 7, 8);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 14 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildInt32.

@Test
public void testBuildInt32() {
    PrimitiveType type = Types.required(INT32).named("test_int32");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
    assertNull(builder.build());
    IntColumn col = intColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4, 10));
    builder.add(sb.stats(type, -11, 7, null));
    builder.add(sb.stats(type, 2, 2, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1, 2));
    builder.add(sb.stats(type, -21, 8));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), 10, 7, 2, null, 2, 8);
    assertCorrectValues(columnIndex.getMinValues(), -4, -11, 2, null, 1, -21);
    assertCorrectFiltering(columnIndex, eq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    Set<Integer> set1 = new HashSet<>();
    set1.add(2);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 3);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), new int[0]);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, 2), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532, -345, null, null));
    builder.add(sb.stats(type, -500, -42, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -42, 2));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 3, 42));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345, -42, null, null, 2, null, 42, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532, -500, null, null, -42, null, 3, null);
    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    Set<Integer> set2 = new HashSet<>();
    set2.add(2);
    assertCorrectFiltering(columnIndex, in(col, set2), 5);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7, 8);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2), 7);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, 2), 1, 2, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532, 345));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234, 42, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 42, -2));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3, -42));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532, null, 234, null, 42, null, null, -3);
    assertCorrectValues(columnIndex.getMinValues(), null, 345, null, 42, null, -2, null, null, -42);
    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    Set<Integer> set3 = new HashSet<>();
    set3.add(2);
    assertCorrectFiltering(columnIndex, in(col, set3), 5);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7, 8);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, 2), 5, 8);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) IntColumn(org.apache.parquet.filter2.predicate.Operators.IntColumn) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 15 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testReadUsingProjectedSchema.

@Test
public void testReadUsingProjectedSchema() {
    MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(REQUIRED, INT32, "b"));
    MessageType projectedSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"));
    MemPageStore store = new MemPageStore(1);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
    writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
    {
        List<Group> groups = new ArrayList<>();
        groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
        Object[][] expected = { { 2 } };
        validateGroups(groups, expected);
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) ArrayList(java.util.ArrayList) List(java.util.List) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10