use of io.trino.orc.metadata.ColumnMetadata in project trino by trinodb.
the class TestOrcBloomFilters method testMatchesExpandedRange.
@Test
public void testMatchesExpandedRange() {
Range range = Range.range(BIGINT, 1233L, true, 1235L, true);
TupleDomainOrcPredicate predicate = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.create(ValueSet.ofRanges(range), false)).setDomainCompactionThreshold(100).build();
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1234L).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> nonMatchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(9876L).buildBloomFilter())));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
}
use of io.trino.orc.metadata.ColumnMetadata in project trino by trinodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
TupleDomainOrcPredicate predicate = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.singleValue(BIGINT, 1234L)).build();
TupleDomainOrcPredicate emptyPredicate = TupleDomainOrcPredicate.builder().build();
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1234L).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> nonMatchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).buildBloomFilter())));
ColumnMetadata<ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
use of io.trino.orc.metadata.ColumnMetadata in project trino by trinodb.
the class TestOrcBloomFilters method testMatchesNonExpandedRange.
@Test
public void testMatchesNonExpandedRange() {
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null, new Utf8BloomFilterBuilder(1000, 0.01).addLong(1500L).buildBloomFilter())));
Range range = Range.range(BIGINT, 1233L, true, 1235L, true);
TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder builder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.create(ValueSet.ofRanges(range), false));
// Domain expansion doesn't take place -> no bloom filtering -> ranges overlap
assertTrue(builder.setDomainCompactionThreshold(1).build().matches(1L, matchingStatisticsByColumnIndex));
assertFalse(builder.setDomainCompactionThreshold(100).build().matches(1L, matchingStatisticsByColumnIndex));
}
use of io.trino.orc.metadata.ColumnMetadata in project trino by trinodb.
the class StripeReader method getRowGroupStatistics.
private static ColumnMetadata<ColumnStatistics> getRowGroupStatistics(ColumnMetadata<OrcType> types, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
requireNonNull(columnIndexes, "columnIndexes is null");
checkArgument(rowGroup >= 0, "rowGroup is negative");
Map<Integer, List<RowGroupIndex>> rowGroupIndexesByColumn = columnIndexes.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getColumnId().getId(), Entry::getValue));
List<ColumnStatistics> statistics = new ArrayList<>(types.size());
for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
List<RowGroupIndex> rowGroupIndexes = rowGroupIndexesByColumn.get(columnIndex);
if (rowGroupIndexes != null) {
statistics.add(rowGroupIndexes.get(rowGroup).getColumnStatistics());
} else {
statistics.add(null);
}
}
return new ColumnMetadata<>(statistics);
}
use of io.trino.orc.metadata.ColumnMetadata in project trino by trinodb.
the class StripeReader method createRowGroups.
private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
int rowsInRowGroup = this.rowsInRowGroup.orElseThrow(() -> new IllegalStateException("Cannot create row groups if row group info is missing"));
ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
for (int rowGroupId : selectedRowGroups) {
Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
int rowOffset = rowGroupId * rowsInRowGroup;
int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
}
return rowGroupBuilder.build();
}
Aggregations