use of io.trino.orc.metadata.statistics.IntegerStatistics in project trino by trinodb.
the class TestOrcReaderPositions method testRowGroupSkipping.
@Test
public void testRowGroupSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single strip file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), rowCount);
assertEquals(reader.getReaderRowCount(), rowCount);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
long position = 50_000;
while (true) {
Page page = reader.nextPage();
if (page == null) {
break;
}
page = page.getLoadedPage();
Block block = page.getBlock(0);
for (int i = 0; i < block.getPositionCount(); i++) {
assertEquals(BIGINT.getLong(block, i), position + i);
}
assertEquals(reader.getFilePosition(), position);
assertEquals(reader.getReaderPosition(), position);
position += page.getPositionCount();
}
assertEquals(position, 70_000);
assertEquals(reader.getFilePosition(), rowCount);
assertEquals(reader.getReaderPosition(), rowCount);
}
}
}
use of io.trino.orc.metadata.statistics.IntegerStatistics in project trino by trinodb.
the class TestOrcReaderPositions method testStripeSkipping.
@Test
public void testStripeSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), 100);
assertEquals(reader.getReaderRowCount(), 40);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
// second stripe
Page page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), 20);
assertCurrentBatch(page, 1);
// fourth stripe
page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 20);
assertEquals(reader.getFilePosition(), 60);
assertCurrentBatch(page, 3);
page = reader.nextPage();
assertNull(page);
assertEquals(reader.getReaderPosition(), 40);
assertEquals(reader.getFilePosition(), 100);
}
}
}
use of io.trino.orc.metadata.statistics.IntegerStatistics in project trino by trinodb.
the class TestOrcBloomFilters method testMatchesExpandedRange.
@Test
public void testMatchesExpandedRange() {
Range range = Range.range(BIGINT, 1233L, true, 1235L, true);
TupleDomainOrcPredicate predicate = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.create(ValueSet.ofRanges(range), false)).setDomainCompactionThreshold(100).build();
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1234L).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> nonMatchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(9876L).buildBloomFilter())));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
}
use of io.trino.orc.metadata.statistics.IntegerStatistics in project trino by trinodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
TupleDomainOrcPredicate predicate = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.singleValue(BIGINT, 1234L)).build();
TupleDomainOrcPredicate emptyPredicate = TupleDomainOrcPredicate.builder().build();
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1234L).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> nonMatchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
use of io.trino.orc.metadata.statistics.IntegerStatistics in project trino by trinodb.
the class TestOrcBloomFilters method testMatchesNonExpandedRange.
@Test
public void testMatchesNonExpandedRange() {
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1500L).buildBloomFilter())));
Range range = Range.range(BIGINT, 1233L, true, 1235L, true);
TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder builder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.create(ValueSet.ofRanges(range), false));
// Domain expansion doesn't take place -> no bloom filtering -> ranges overlap
assertTrue(builder.setDomainCompactionThreshold(1).build().matches(1L, matchingStatisticsByColumnIndex));
assertFalse(builder.setDomainCompactionThreshold(100).build().matches(1L, matchingStatisticsByColumnIndex));
}
Aggregations