Search in sources :

Example 36 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestColumnIndexBuilder method testBuildInt64.

@Test
public void testBuildInt64() {
    PrimitiveType type = Types.required(INT64).named("test_int64");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    // assertThat(builder, instanceOf(LongColumnIndexBuilder.class));
    assertNull(builder.build());
    Operators.LongColumn col = longColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4L, 10L));
    builder.add(sb.stats(type, -11L, 7L, null));
    builder.add(sb.stats(type, 2L, 2L, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1L, 2L));
    builder.add(sb.stats(type, -21L, 8L));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0L, 1L, 2L, 3L, 0L, 0L);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), 10L, 7L, 2L, null, 2L, 8L);
    assertCorrectValues(columnIndex.getMinValues(), -4L, -11L, 2L, null, 1L, -21L);
    assertCorrectFiltering(columnIndex, eq(col, 0L), 0, 1, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    assertCorrectFiltering(columnIndex, notEq(col, 0L), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2L), 0, 1, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2L), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, -21L));
    assertCorrectFiltering(columnIndex, ltEq(col, -21L), 5);
    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 0, 1, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532L, -345L, null, null));
    builder.add(sb.stats(type, -234L, -42L, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -42L, 2L));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3L, 42L));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345L, -42L, null, null, 2L, null, 42L, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532L, -234L, null, null, -42L, null, -3L, null);
    assertCorrectFiltering(columnIndex, eq(col, -42L), 2, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    assertCorrectFiltering(columnIndex, notEq(col, -42L), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2L), 7);
    assertCorrectFiltering(columnIndex, gtEq(col, 2L), 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, -42L), 1, 2);
    assertCorrectFiltering(columnIndex, ltEq(col, -42L), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532L, 345L));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234L, 42L, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 42L, -2L));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3L, -42L));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532L, null, 234L, null, 42L, null, null, -3L);
    assertCorrectValues(columnIndex.getMinValues(), null, 345L, null, 42L, null, -2L, null, null, -42L);
    assertCorrectFiltering(columnIndex, eq(col, 0L), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    assertCorrectFiltering(columnIndex, notEq(col, 0L), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2L), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2L), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, -42L));
    assertCorrectFiltering(columnIndex, ltEq(col, -42L), 8);
    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : Operators(org.apache.parquet.filter2.predicate.Operators) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) ColumnIndexBuilder(org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.testng.annotations.Test)

Example 37 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestColumnIndexBuilder method testBuildInt32.

@Test
public void testBuildInt32() {
    PrimitiveType type = Types.required(INT32).named("test_int32");
    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    // assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
    assertNull(builder.build());
    Operators.IntColumn col = intColumn("test_col");
    StatsBuilder sb = new StatsBuilder();
    builder.add(sb.stats(type, -4, 10));
    builder.add(sb.stats(type, -11, 7, null));
    builder.add(sb.stats(type, 2, 2, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 1, 2));
    builder.add(sb.stats(type, -21, 8));
    assertEquals(6, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    ColumnIndex columnIndex = builder.build();
    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
    assertCorrectValues(columnIndex.getMaxValues(), 10, 7, 2, null, 2, 8);
    assertCorrectValues(columnIndex.getMinValues(), -4, -11, 2, null, 1, -21);
    assertCorrectFiltering(columnIndex, eq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, lt(col, 2), 0, 1, 4, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 0, 1, 2, 4, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 5);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -532, -345, null, null));
    builder.add(sb.stats(type, -500, -42, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, -42, 2));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 3, 42));
    builder.add(sb.stats(type, null, null));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
    assertCorrectValues(columnIndex.getMaxValues(), null, -345, -42, null, null, 2, null, 42, null);
    assertCorrectValues(columnIndex.getMinValues(), null, -532, -500, null, null, -42, null, 3, null);
    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, gt(col, 2), 7);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 5, 7);
    assertCorrectFiltering(columnIndex, lt(col, 2), 1, 2, 5);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 1, 2, 5);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
    sb = new StatsBuilder();
    builder.add(sb.stats(type, null, null, null, null, null));
    builder.add(sb.stats(type, 532, 345));
    builder.add(sb.stats(type, null, null, null));
    builder.add(sb.stats(type, 234, 42, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, 42, -2));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, null, null));
    builder.add(sb.stats(type, -3, -42));
    assertEquals(9, builder.getPageCount());
    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
    columnIndex = builder.build();
    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
    assertCorrectValues(columnIndex.getMaxValues(), null, 532, null, 234, null, 42, null, null, -3);
    assertCorrectValues(columnIndex.getMinValues(), null, 345, null, 42, null, -2, null, null, -42);
    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
    assertCorrectFiltering(columnIndex, gtEq(col, 2), 1, 3, 5);
    assertCorrectFiltering(columnIndex, lt(col, 2), 5, 8);
    assertCorrectFiltering(columnIndex, ltEq(col, 2), 5, 8);
    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
}
Also used : Operators(org.apache.parquet.filter2.predicate.Operators) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) ColumnIndexBuilder(org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.testng.annotations.Test)

Example 38 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestParquetPredicateUtils method testParquetTupleDomainPrimitive.

@Test
public void testParquetTupleDomainPrimitive() {
    HiveColumnHandle columnHandle = new HiveColumnHandle("my_primitive", HiveType.valueOf("bigint"), parseTypeSignature(StandardTypes.BIGINT), 0, REGULAR, Optional.empty(), Optional.empty());
    Domain singleValueDomain = Domain.singleValue(BIGINT, 123L);
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, singleValueDomain));
    MessageType fileSchema = new MessageType("hive_schema", new PrimitiveType(OPTIONAL, INT64, "my_primitive"));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
    assertEquals(tupleDomain.getDomains().get().size(), 1);
    ColumnDescriptor descriptor = tupleDomain.getDomains().get().keySet().iterator().next();
    assertEquals(descriptor.getPath().length, 1);
    assertEquals(descriptor.getPath()[0], "my_primitive");
    Domain predicateDomain = Iterables.getOnlyElement(tupleDomain.getDomains().get().values());
    assertEquals(predicateDomain, singleValueDomain);
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ParquetPageSourceFactory.getParquetTupleDomain(com.facebook.presto.hive.parquet.ParquetPageSourceFactory.getParquetTupleDomain) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Example 39 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class TestParquetPredicateUtils method testParquetTupleDomainStructArray.

@Test
public void testParquetTupleDomainStructArray() {
    HiveColumnHandle columnHandle = new HiveColumnHandle("my_array_struct", HiveType.valueOf("array<struct<a:int>>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty(), Optional.empty());
    RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER);
    RowType rowType = RowType.from(ImmutableList.of(rowField));
    TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(rowType))));
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array_struct", new GroupType(REPEATED, "bag", new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a")))));
    Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema);
    TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain);
    assertTrue(tupleDomain.getDomains().get().isEmpty());
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RowType(com.facebook.presto.common.type.RowType) ArrayType(com.facebook.presto.common.type.ArrayType) GroupType(org.apache.parquet.schema.GroupType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Example 40 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project presto by prestodb.

the class AbstractTestParquetReader method testNullableNullCount.

@Test
public void testNullableNullCount() {
    PrimitiveType primitiveType = new PrimitiveType(OPTIONAL, BINARY, "testColumn");
    Statistics statistics = new Statistics();
    assertEquals(MetadataReader.readStats(statistics, primitiveType.getPrimitiveTypeName()).getNumNulls(), -1);
    statistics.setNull_count(10);
    assertEquals(MetadataReader.readStats(statistics, primitiveType.getPrimitiveTypeName()).getNumNulls(), 10);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) Statistics(org.apache.parquet.format.Statistics) Test(org.testng.annotations.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10