Search in sources :

Example 6 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestPigSchemaConverter method testSchemaEvolution.

@Test
public void testSchemaEvolution() {
    Map<String, Set<String>> map = new LinkedHashMap<String, Set<String>>();
    map.put("pig.schema", new LinkedHashSet<String>(Arrays.asList("a:int, b:int, c:int, d:int, e:int, f:int", "aa:int, aaa:int, b:int, c:int, ee:int")));
    Schema result = getPigSchemaFromMultipleFiles(new MessageType("file_schema", new PrimitiveType(OPTIONAL, INT32, "a")), map);
    assertEquals("a: int,b: int,c: int,d: int,e: int,f: int,aa: int,aaa: int,ee: int", pigSchemaToString(result));
}
Also used : Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) PrimitiveType(org.apache.parquet.schema.PrimitiveType) PigSchemaConverter.pigSchemaToString(org.apache.parquet.pig.PigSchemaConverter.pigSchemaToString) MessageType(org.apache.parquet.schema.MessageType) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 7 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildBinaryUtf8.

@Test
public void testBuildBinaryUtf8() {
    PrimitiveType type = Types.required(BINARY).as(UTF8).named("test_binary_utf8");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(BinaryColumnIndexBuilder.class));
    assertNull(builder.build());
    BinaryColumn col = binaryColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, stringBinary("Jeltz"), stringBinary("Slartibartfast"), null, null));
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, stringBinary("Beeblebrox"), stringBinary("Prefect")));
    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Trilian"), null));
    builder.add(sb.stats(type, stringBinary("Beeblebrox")));
    builder.add(sb.stats(type, null, null));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 5, 2, 0, 1, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, true, true, false, false, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, stringBinary("Slartibartfast"), null, null, stringBinary("Prefect"), stringBinary("Trilian"), stringBinary("Beeblebrox"), null);
    assertCorrectValues(columnIndex.getMinValues(), null, stringBinary("Jeltz"), null, null, stringBinary("Beeblebrox"), stringBinary("Dent"), stringBinary("Beeblebrox"), null);
    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Marvin")), 1, 4, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 5, 7);
    Set<Binary> set1 = new HashSet<>();
    set1.add(stringBinary("Marvin"));
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 4, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 6, 7);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 6);
    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Beeblebrox")), 0, 1, 2, 3, 4, 5, 7);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5, 6);
    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Prefect")), 1, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Dent")), 4, 6);
    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Dent")), 4, 5, 6);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 4, 6);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 7);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, stringBinary("Beeblebrox"), stringBinary("Dent"), null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Jeltz")));
    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Prefect"), null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, stringBinary("Slartibartfast")));
    builder.add(sb.stats(type, null, null));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 5, 0, 1, 2, 0, 2);
    assertCorrectNullPages(columnIndex, false, true, true, false, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), stringBinary("Dent"), null, null, stringBinary("Jeltz"), stringBinary("Prefect"), null, stringBinary("Slartibartfast"), null);
    assertCorrectValues(columnIndex.getMinValues(), stringBinary("Beeblebrox"), null, null, stringBinary("Dent"), stringBinary("Dent"), null, stringBinary("Slartibartfast"), null);
    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Jeltz")), 3, 4);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 4, 5, 7);
    Set<Binary> set2 = new HashSet<>();
    set2.add(stringBinary("Jeltz"));
    assertCorrectFiltering(columnIndex, in(col, set2), 3, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 5, 6, 7);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 6);
    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Slartibartfast")), 0, 1, 2, 3, 4, 5, 7);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 3, 4, 6);
    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Marvin")), 4, 6);
    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Marvin")), 4, 6);
    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Dent")), 0);
    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Dent")), 0, 3, 4);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 0);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 6, 7);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, stringBinary("Slartibartfast")));
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, stringBinary("Prefect"), stringBinary("Jeltz"), null));
    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Dent")));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Beeblebrox"), null, null));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 0, 5, 1, 0, 2, 2, 2);
    assertCorrectNullPages(columnIndex, true, false, true, false, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, stringBinary("Slartibartfast"), null, stringBinary("Prefect"), stringBinary("Dent"), null, null, stringBinary("Dent"));
    assertCorrectValues(columnIndex.getMinValues(), null, stringBinary("Slartibartfast"), null, stringBinary("Jeltz"), stringBinary("Dent"), null, null, stringBinary("Beeblebrox"));
    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Marvin")), 3);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 5, 6, 7);
    Set<Binary> set3 = new HashSet<>();
    set3.add(stringBinary("Marvin"));
    assertCorrectFiltering(columnIndex, in(col, set3), 3);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 4);
    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 1, 2, 3, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 4, 7);
    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1);
    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Prefect")), 1, 3);
    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Marvin")), 3, 4, 7);
    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Marvin")), 3, 4, 7);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 6, 7);
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Binary(org.apache.parquet.io.api.Binary) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildFloat.

@Test
public void testBuildFloat() {
    PrimitiveType type = Types.required(FLOAT).named("test_float");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(FloatColumnIndexBuilder.class));
    assertNull(builder.build());
    FloatColumn col = floatColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4.2f, -4.1f));
    builder.add(sb.stats(type, -11.7f, 7.0f, null));
    builder.add(sb.stats(type, 2.2f, 2.2f, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1.9f, 2.32f));
    builder.add(sb.stats(type, -21.0f, 8.1f));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), -4.1f, 7.0f, 2.2f, null, 2.32f, 8.1f);
    assertCorrectValues(columnIndex.getMinValues(), -4.2f, -11.7f, 2.2f, null, 1.9f, -21.0f);
    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 1, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    Set<Float> set1 = new HashSet<>();
    set1.add(0.0f);
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 4);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 5);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 4);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 4, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 0, 1, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 1.9f), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 4, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532.3f, -345.2f, null, null));
    builder.add(sb.stats(type, -300.6f, -234.7f, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -234.6f, 2.99999f));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 3.0f, 42.83f));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345.2f, -234.7f, null, null, 2.99999f, null, 42.83f, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532.3f, -300.6f, null, null, -234.6f, null, 3.0f, null);
    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    Set<Float> set2 = new HashSet<>();
    set2.add(0.0f);
    assertCorrectFiltering(columnIndex, in(col, set2), 5);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7, 8);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 5, 7);
    assertCorrectFiltering(columnIndex, gtEq(col, -234.7f), 2, 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, -234.6f), 1, 2);
    assertCorrectFiltering(columnIndex, ltEq(col, -234.6f), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532.3f, 345.2f));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234.7f, 234.6f, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 234.6f, -2.99999f));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3.0f, -42.83f));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532.3f, null, 234.7f, null, 234.6f, null, null, -3.0f);
    assertCorrectValues(columnIndex.getMinValues(), null, 345.2f, null, 234.6f, null, -2.99999f, null, null, -42.83f);
    assertCorrectFiltering(columnIndex, eq(col, 234.65f), 3);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    Set<Float> set3 = new HashSet<>();
    set3.add(234.65f);
    assertCorrectFiltering(columnIndex, in(col, set3), 3);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7, 8);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 5, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 5, 8);
    assertCorrectFiltering(columnIndex, ltEq(col, 0.0f), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) FloatColumn(org.apache.parquet.filter2.predicate.Operators.FloatColumn) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildFloatZeroNaN.

@Test
public void testBuildFloatZeroNaN() {
    PrimitiveType type = Types.required(FLOAT).named("test_float");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -1.0f, -0.0f));
    builder.add(sb.stats(type, 0.0f, 1.0f));
    builder.add(sb.stats(type, 1.0f, 100.0f));
    ColumnIndex columnIndex = builder.build();
    assertCorrectValues(columnIndex.getMinValues(), -1.0f, -0.0f, 1.0f);
    assertCorrectValues(columnIndex.getMaxValues(), 0.0f, 1.0f, 100.0f);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    builder.add(sb.stats(type, -1.0f, -0.0f));
    builder.add(sb.stats(type, 0.0f, Float.NaN));
    builder.add(sb.stats(type, 1.0f, 100.0f));
    assertNull(builder.build());
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.junit.Test)

Example 10 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestColumnIndexBuilder method testBuildBinaryDecimal.

@Test
public void testBuildBinaryDecimal() {
    PrimitiveType type = Types.required(BINARY).as(DECIMAL).precision(12).scale(2).named("test_binary_decimal");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    assertThat(builder, instanceOf(BinaryColumnIndexBuilder.class));
    assertNull(builder.build());
    BinaryColumn col = binaryColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("1234567890.12")));
    builder.add(sb.stats(type, decimalBinary("-234.23"), null, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("2348978.45")));
    builder.add(sb.stats(type, null, null, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("87656273")));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 0, 3, 3, 0, 4, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, decimalBinary("1234567890.12"), decimalBinary("-234.23"), null, decimalBinary("2348978.45"), null, null, decimalBinary("87656273"));
    assertCorrectValues(columnIndex.getMinValues(), null, decimalBinary("-0.17"), decimalBinary("-234.23"), null, decimalBinary("-9999293.23"), null, null, decimalBinary("87656273"));
    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("0.0")), 1, 4);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 5, 6);
    Set<Binary> set1 = new HashSet<>();
    set1.add(Binary.fromString("0.0"));
    assertCorrectFiltering(columnIndex, in(col, set1), 1, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 5, 6, 7);
    set1.add(null);
    assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 6);
    assertCorrectFiltering(columnIndex, notIn(col, set1), 7);
    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 4, 5, 6);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 7);
    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("2348978.45")), 1);
    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("2348978.45")), 1, 4);
    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-234.23")), 4);
    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-234.23")), 2, 4);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 7);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null));
    builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("-234.23")));
    builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("87656273")));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("87656273")));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
    builder.add(sb.stats(type, null, null, null));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 4, 0, 0, 2, 0, 2, 3, 3);
    assertCorrectNullPages(columnIndex, true, false, false, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, decimalBinary("-234.23"), decimalBinary("87656273"), null, decimalBinary("87656273"), null, decimalBinary("1234567890.12"), null);
    assertCorrectValues(columnIndex.getMinValues(), null, decimalBinary("-9999293.23"), decimalBinary("-0.17"), null, decimalBinary("87656273"), null, decimalBinary("1234567890.12"), null);
    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("87656273")), 2, 4);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 3, 5, 6, 7);
    Set<Binary> set2 = new HashSet<>();
    set2.add(decimalBinary("87656273"));
    assertCorrectFiltering(columnIndex, in(col, set2), 2, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 5, 6, 7);
    set2.add(null);
    assertCorrectFiltering(columnIndex, in(col, set2), 0, 2, 3, 4, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notIn(col, set2), 1);
    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 6);
    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("87656273")), 6);
    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("87656273")), 2, 4, 6);
    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 1);
    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 1, 2);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 2, 3, 5, 6, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 6);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
    builder.add(sb.stats(type, null, null, null, null));
    builder.add(sb.stats(type, decimalBinary("1234567890.12"), decimalBinary("87656273")));
    builder.add(sb.stats(type, decimalBinary("987656273"), decimalBinary("-0.17")));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, decimalBinary("-234.23"), decimalBinary("-9999293.23")));
    assertEquals(8, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 3, 2, 3, 4, 0, 0, 2, 0);
    assertCorrectNullPages(columnIndex, true, true, false, true, false, false, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, null, decimalBinary("1234567890.12"), null, decimalBinary("1234567890.12"), decimalBinary("987656273"), null, decimalBinary("-234.23"));
    assertCorrectValues(columnIndex.getMinValues(), null, null, decimalBinary("1234567890.12"), null, decimalBinary("87656273"), decimalBinary("-0.17"), null, decimalBinary("-9999293.23"));
    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("1234567890.12")), 2, 4);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 6);
    Set<Binary> set3 = new HashSet<>();
    set3.add(decimalBinary("1234567890.12"));
    assertCorrectFiltering(columnIndex, in(col, set3), 2, 4);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 3, 5, 6, 7);
    set3.add(null);
    assertCorrectFiltering(columnIndex, in(col, set3), 0, 1, 2, 3, 4, 6);
    assertCorrectFiltering(columnIndex, notIn(col, set3), 5, 7);
    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("0.0")), 0, 1, 2, 3, 4, 5, 6, 7);
    assertCorrectFiltering(columnIndex, notEq(col, null), 2, 4, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("1234567890.12")));
    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("1234567890.12")), 2, 4);
    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 7);
    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 5, 7);
    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 5, 6);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 2, 4, 5, 7);
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Binary(org.apache.parquet.io.api.Binary) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10