Search in sources :

Example 36 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testSmallint.

@Test
public void testSmallint() throws ParquetCorruptionException {
    ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "SmallintColumn");
    assertEquals(getDomain(columnDescriptor, SMALLINT, 0, null, ID), Domain.all(SMALLINT));
    assertEquals(getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(100, 100), ID), singleValue(SMALLINT, 100L));
    assertEquals(getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(SMALLINT, 0L, true, 100L, true)), false));
    assertEquals(getDomain(columnDescriptor, SMALLINT, 20, longColumnStats(0, 2147483648L), ID), notNull(SMALLINT));
    assertEquals(getDomain(columnDescriptor, SMALLINT, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(SMALLINT), true));
    // fail on corrupted statistics
    assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 SmallintColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Test(org.testng.annotations.Test)

Example 37 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testInteger.

@Test
public void testInteger() throws ParquetCorruptionException {
    ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "IntegerColumn");
    assertEquals(getDomain(columnDescriptor, INTEGER, 0, null, ID), Domain.all(INTEGER));
    assertEquals(getDomain(columnDescriptor, INTEGER, 10, longColumnStats(100, 100), ID), singleValue(INTEGER, 100L));
    assertEquals(getDomain(columnDescriptor, INTEGER, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(INTEGER, 0L, true, 100L, true)), false));
    assertEquals(getDomain(columnDescriptor, INTEGER, 20, longColumnStats(0, 2147483648L), ID), notNull(INTEGER));
    assertEquals(getDomain(columnDescriptor, INTEGER, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(INTEGER), true));
    // fail on corrupted statistics
    assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, INTEGER, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 IntegerColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Test(org.testng.annotations.Test)

Example 38 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testVarcharMatchesWithDictionaryDescriptor.

@Test
public void testVarcharMatchesWithDictionaryDescriptor() {
    ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] { "path" }, new PrimitiveType(OPTIONAL, BINARY, 0, ""), 0, 0);
    RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
    TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
    TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
    DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] { 0, 0, 0, 0 }), 1, PLAIN_DICTIONARY);
    assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}
Also used : DictionaryDescriptor(com.facebook.presto.parquet.predicate.DictionaryDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) TupleDomainParquetPredicate(com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Test(org.testng.annotations.Test)

Example 39 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testDate.

@Test
public void testDate() throws ParquetCorruptionException {
    ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "DateColumn");
    assertEquals(getDomain(columnDescriptor, DATE, 0, null, ID), Domain.all(DATE));
    assertEquals(getDomain(columnDescriptor, DATE, 10, intColumnStats(100, 100), ID), singleValue(DATE, 100L));
    assertEquals(getDomain(columnDescriptor, DATE, 10, intColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(DATE, 0L, true, 100L, true)), false));
    // fail on corrupted statistics
    assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, DATE, 10, intColumnStats(200, 100), ID)).withMessage("Corrupted statistics for column \"[] required int32 DateColumn\" in Parquet file \"testFile\": [min: 200, max: 100, num_nulls: 0]");
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Test(org.testng.annotations.Test)

Example 40 with ColumnDescriptor

use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.

the class TestTupleDomainParquetPredicate method testTinyint.

@Test
public void testTinyint() throws ParquetCorruptionException {
    ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "TinyintColumn");
    assertEquals(getDomain(columnDescriptor, TINYINT, 0, null, ID), Domain.all(TINYINT));
    assertEquals(getDomain(columnDescriptor, TINYINT, 10, longColumnStats(100, 100), ID), singleValue(TINYINT, 100L));
    assertEquals(getDomain(columnDescriptor, TINYINT, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(TINYINT, 0L, true, 100L, true)), false));
    assertEquals(getDomain(columnDescriptor, TINYINT, 20, longColumnStats(0, 2147483648L), ID), notNull(TINYINT));
    assertEquals(getDomain(columnDescriptor, TINYINT, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(TINYINT), true));
    // fail on corrupted statistics
    assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, TINYINT, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 TinyintColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Test(org.testng.annotations.Test)

Aggregations

ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)88 MessageType (org.apache.parquet.schema.MessageType)33 PrimitiveType (org.apache.parquet.schema.PrimitiveType)18 Test (org.testng.annotations.Test)18 RichColumnDescriptor (com.facebook.presto.parquet.RichColumnDescriptor)16 ArrayList (java.util.ArrayList)16 GroupType (org.apache.parquet.schema.GroupType)14 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)12 Test (org.junit.Test)12 Domain (com.facebook.presto.common.predicate.Domain)11 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)11 Path (org.apache.hadoop.fs.Path)11 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)11 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)9 HashMap (java.util.HashMap)9 Configuration (org.apache.hadoop.conf.Configuration)9 Type (org.apache.parquet.schema.Type)9 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)8 IOException (java.io.IOException)7