use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class StripeReader method getRowGroupStatistics.
private static ColumnMetadata<ColumnStatistics> getRowGroupStatistics(ColumnMetadata<OrcType> types, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
requireNonNull(columnIndexes, "columnIndexes is null");
checkArgument(rowGroup >= 0, "rowGroup is negative");
Map<Integer, List<RowGroupIndex>> rowGroupIndexesByColumn = columnIndexes.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getColumnId().getId(), Entry::getValue));
List<ColumnStatistics> statistics = new ArrayList<>(types.size());
for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
List<RowGroupIndex> rowGroupIndexes = rowGroupIndexesByColumn.get(columnIndex);
if (rowGroupIndexes != null) {
statistics.add(rowGroupIndexes.get(rowGroup).getColumnStatistics());
} else {
statistics.add(null);
}
}
return new ColumnMetadata<>(statistics);
}
use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method close.
@Override
public void close() throws IOException {
try (Closer closer = Closer.create()) {
closer.register(orcDataSource);
for (AbstractColumnReader column : columnReaders) {
if (column != null) {
closer.register(column::close);
}
}
}
if (writeChecksumBuilder.isPresent()) {
OrcWriteValidation.WriteChecksum actualChecksum = writeChecksumBuilder.get().build();
validateWrite(validation -> validation.getChecksum().getTotalRowCount() == actualChecksum.getTotalRowCount(), "Invalid row count");
List<Long> columnHashes = actualChecksum.getColumnHashes();
for (int i = 0; i < columnHashes.size(); i++) {
int columnIndex = i;
validateWrite(validation -> validation.getChecksum().getColumnHashes().get(columnIndex).equals(columnHashes.get(columnIndex)), "Invalid checksum for column %s", columnIndex);
}
validateWrite(validation -> validation.getChecksum().getStripeHash() == actualChecksum.getStripeHash(), "Invalid stripes checksum");
}
if (fileStatisticsValidation.isPresent()) {
Optional<ColumnMetadata<ColumnStatistics>> columnStatistics = fileStatisticsValidation.get().build();
writeValidation.get().validateFileStatistics(orcDataSource.getId(), columnStatistics);
}
}
use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
TupleDomainOrcPredicate predicate = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(true).addColumn(ROOT_COLUMN, Domain.singleValue(BIGINT, 1234L)).build();
TupleDomainOrcPredicate emptyPredicate = TupleDomainOrcPredicate.builder().build();
// assemble a matching and a non-matching bloom filter
HashableBloomFilter bloomFilter = new HashableBloomFilter(1000, 0.01);
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(bloomFilter);
bloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(bloomFilter);
ColumnMetadata<ColumnStatistics> matchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, toBloomFilter(orcBloomFilter))));
ColumnMetadata<ColumnStatistics> nonMatchingStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, toBloomFilter(emptyOrcBloomFilter))));
ColumnMetadata<ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = new ColumnMetadata<>(ImmutableList.of(new ColumnStatistics(null, 0, null, new IntegerStatistics(10L, 2000L, null), null, null, null, null, null, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class OrcWriter method toFileStats.
private static Optional<ColumnMetadata<ColumnStatistics>> toFileStats(List<ColumnMetadata<ColumnStatistics>> stripes) {
if (stripes.isEmpty()) {
return Optional.empty();
}
int columnCount = stripes.get(0).size();
checkArgument(stripes.stream().allMatch(stripe -> columnCount == stripe.size()));
ImmutableList.Builder<ColumnStatistics> fileStats = ImmutableList.builder();
for (int i = 0; i < columnCount; i++) {
OrcColumnId columnId = new OrcColumnId(i);
fileStats.add(ColumnStatistics.mergeColumnStatistics(stripes.stream().map(stripe -> stripe.get(columnId)).collect(toList())));
}
return Optional.of(new ColumnMetadata<>(fileStats.build()));
}
use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class OrcWriter method bufferFileFooter.
/**
* Collect the data for for the file footer. This is not the actual data, but
* instead are functions that know how to write the data.
*/
private List<OrcDataOutput> bufferFileFooter() throws IOException {
if (preCloseCallback.isPresent()) {
try {
preCloseCallback.get().call();
} catch (Exception e) {
log.debug("Call pre close call back error");
}
}
List<OrcDataOutput> outputData = new ArrayList<>();
Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).map(Optional::of).collect(toList()));
Slice metadataSlice = metadataWriter.writeMetadata(metadata);
outputData.add(createDataOutput(metadataSlice));
long numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
Optional<ColumnMetadata<ColumnStatistics>> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
recordValidation(validation -> validation.setFileStatistics(fileStats));
Map<String, Slice> localUserMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toImmutableList()), orcTypes, fileStats, localUserMetadata);
closedStripes.clear();
closedStripesRetainedBytes = 0;
Slice footerSlice = metadataWriter.writeFooter(footer);
outputData.add(createDataOutput(footerSlice));
recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), compression, maxCompressionBufferSize);
outputData.add(createDataOutput(postscriptSlice));
outputData.add(createDataOutput(Slices.wrappedBuffer(UnsignedBytes.checkedCast(postscriptSlice.length()))));
return outputData;
}
Aggregations