use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testSmallint.
@Test
public void testSmallint() throws ParquetCorruptionException {
ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "SmallintColumn");
assertEquals(getDomain(columnDescriptor, SMALLINT, 0, null, ID), Domain.all(SMALLINT));
assertEquals(getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(100, 100), ID), singleValue(SMALLINT, 100L));
assertEquals(getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(SMALLINT, 0L, true, 100L, true)), false));
assertEquals(getDomain(columnDescriptor, SMALLINT, 20, longColumnStats(0, 2147483648L), ID), notNull(SMALLINT));
assertEquals(getDomain(columnDescriptor, SMALLINT, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(SMALLINT), true));
// fail on corrupted statistics
assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, SMALLINT, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 SmallintColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testInteger.
@Test
public void testInteger() throws ParquetCorruptionException {
ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "IntegerColumn");
assertEquals(getDomain(columnDescriptor, INTEGER, 0, null, ID), Domain.all(INTEGER));
assertEquals(getDomain(columnDescriptor, INTEGER, 10, longColumnStats(100, 100), ID), singleValue(INTEGER, 100L));
assertEquals(getDomain(columnDescriptor, INTEGER, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(INTEGER, 0L, true, 100L, true)), false));
assertEquals(getDomain(columnDescriptor, INTEGER, 20, longColumnStats(0, 2147483648L), ID), notNull(INTEGER));
assertEquals(getDomain(columnDescriptor, INTEGER, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(INTEGER), true));
// fail on corrupted statistics
assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, INTEGER, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 IntegerColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testVarcharMatchesWithDictionaryDescriptor.
@Test
public void testVarcharMatchesWithDictionaryDescriptor() {
ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] { "path" }, new PrimitiveType(OPTIONAL, BINARY, 0, ""), 0, 0);
RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] { 0, 0, 0, 0 }), 1, PLAIN_DICTIONARY);
assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testDate.
@Test
public void testDate() throws ParquetCorruptionException {
ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "DateColumn");
assertEquals(getDomain(columnDescriptor, DATE, 0, null, ID), Domain.all(DATE));
assertEquals(getDomain(columnDescriptor, DATE, 10, intColumnStats(100, 100), ID), singleValue(DATE, 100L));
assertEquals(getDomain(columnDescriptor, DATE, 10, intColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(DATE, 0L, true, 100L, true)), false));
// fail on corrupted statistics
assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, DATE, 10, intColumnStats(200, 100), ID)).withMessage("Corrupted statistics for column \"[] required int32 DateColumn\" in Parquet file \"testFile\": [min: 200, max: 100, num_nulls: 0]");
}
use of org.apache.parquet.column.ColumnDescriptor in project presto by prestodb.
the class TestTupleDomainParquetPredicate method testTinyint.
@Test
public void testTinyint() throws ParquetCorruptionException {
ColumnDescriptor columnDescriptor = createColumnDescriptor(INT32, "TinyintColumn");
assertEquals(getDomain(columnDescriptor, TINYINT, 0, null, ID), Domain.all(TINYINT));
assertEquals(getDomain(columnDescriptor, TINYINT, 10, longColumnStats(100, 100), ID), singleValue(TINYINT, 100L));
assertEquals(getDomain(columnDescriptor, TINYINT, 10, longColumnStats(0, 100), ID), create(ValueSet.ofRanges(range(TINYINT, 0L, true, 100L, true)), false));
assertEquals(getDomain(columnDescriptor, TINYINT, 20, longColumnStats(0, 2147483648L), ID), notNull(TINYINT));
assertEquals(getDomain(columnDescriptor, TINYINT, 20, longOnlyNullsStats(10), ID), create(ValueSet.all(TINYINT), true));
// fail on corrupted statistics
assertThatExceptionOfType(ParquetCorruptionException.class).isThrownBy(() -> getDomain(columnDescriptor, TINYINT, 10, longColumnStats(2147483648L, 10), ID)).withMessage("Corrupted statistics for column \"[] required int32 TinyintColumn\" in Parquet file \"testFile\": [min: 2147483648, max: 10, num_nulls: 0]");
}
Aggregations