use of io.prestosql.orc.metadata.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class StripeReader method readColumnIndexes.
private Map<StreamId, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcChunkLoader> streamsData, Map<OrcColumnId, List<HashableBloomFilter>> bloomFilterIndexes, StripeInformation stripe) throws IOException {
ImmutableMap.Builder<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder();
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
Stream stream = entry.getValue();
if (stream.getStreamKind() == ROW_INDEX) {
OrcInputStream inputStream = new OrcInputStream(streamsData.get(entry.getKey()));
List<HashableBloomFilter> bloomFilters = bloomFilterIndexes.get(entry.getKey().getColumnId());
List<RowGroupIndex> rowGroupIndexes;
if (orcCacheProperties.isRowIndexCacheEnabled()) {
OrcRowIndexCacheKey indexCacheKey = new OrcRowIndexCacheKey();
indexCacheKey.setOrcDataSourceId(new OrcDataSourceIdWithTimeStamp(orcDataSource.getId(), orcDataSource.getLastModifiedTime()));
indexCacheKey.setStripeOffset(stripe.getOffset());
indexCacheKey.setStreamId(entry.getKey());
try {
rowGroupIndexes = orcCacheStore.getRowIndexCache().get(indexCacheKey, () -> metadataReader.readRowIndexes(hiveWriterVersion, inputStream));
} catch (UncheckedExecutionException | ExecutionException executionException) {
handleCacheLoadException(executionException);
log.debug(executionException.getCause(), "Error while caching row group indexes. Falling back to default flow");
rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
}
} else {
rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
}
if (bloomFilters != null && !bloomFilters.isEmpty()) {
ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder();
for (int i = 0; i < rowGroupIndexes.size(); i++) {
RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i);
ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics().withBloomFilter(bloomFilters.get(i));
newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics));
}
rowGroupIndexes = newRowGroupIndexes.build();
}
columnIndexes.put(entry.getKey(), rowGroupIndexes);
}
}
return columnIndexes.build();
}
use of io.prestosql.orc.metadata.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class StripeReader method getRowGroupStatistics.
private static ColumnMetadata<ColumnStatistics> getRowGroupStatistics(ColumnMetadata<OrcType> types, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
requireNonNull(columnIndexes, "columnIndexes is null");
checkArgument(rowGroup >= 0, "rowGroup is negative");
Map<Integer, List<RowGroupIndex>> rowGroupIndexesByColumn = columnIndexes.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getColumnId().getId(), Entry::getValue));
List<ColumnStatistics> statistics = new ArrayList<>(types.size());
for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
List<RowGroupIndex> rowGroupIndexes = rowGroupIndexesByColumn.get(columnIndex);
if (rowGroupIndexes != null) {
statistics.add(rowGroupIndexes.get(rowGroup).getColumnStatistics());
} else {
statistics.add(null);
}
}
return new ColumnMetadata<>(statistics);
}
use of io.prestosql.orc.metadata.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class FloatColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
List<FloatStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
FloatStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createFloatColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
return ImmutableList.of(new StreamDataOutput(slice, stream));
}
use of io.prestosql.orc.metadata.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class FloatColumnWriter method finishRowGroup.
@Override
public Map<OrcColumnId, ColumnStatistics> finishRowGroup() {
checkState(!closed);
ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
rowGroupColumnStatistics.add(statistics);
statisticsBuilder = new DoubleStatisticsBuilder();
return ImmutableMap.of(columnId, statistics);
}
use of io.prestosql.orc.metadata.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class ListColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createArrayColumnPositionList(compressed, lengthCheckpoint, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
indexStreams.add(new StreamDataOutput(slice, stream));
indexStreams.addAll(elementWriter.getIndexStreams(metadataWriter));
return indexStreams.build();
}
Aggregations