use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class OrcSelectiveRecordReader method createColumnReaders.
public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
colReaderWithFilter = new IntArraySet();
colReaderWithORFilter = new IntArraySet();
colReaderWithoutFilter = new IntArraySet();
IntArraySet remainingColumns = new IntArraySet();
remainingColumns.addAll(includedColumns.keySet());
for (int i = 0; i < fieldCount; i++) {
// create column reader only for columns which are part of projection and filter.
if (includedColumns.containsKey(i)) {
int columnIndex = i;
OrcColumn column = fileColumns.get(columnIndex);
boolean outputRequired = outputColumns.contains(i);
SelectiveColumnReader columnReader = null;
if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
} else {
columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
if (orcCacheProperties.isRowDataCacheEnabled()) {
columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
}
}
columnReaders[columnIndex] = columnReader;
if (filters.get(i) != null) {
colReaderWithFilter.add(columnIndex);
} else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
colReaderWithORFilter.add(columnIndex);
} else {
colReaderWithoutFilter.add(columnIndex);
}
remainingColumns.remove(columnIndex);
}
}
/* if any still remaining colIdx < 0 */
remainingColumns.removeAll(missingColumns);
for (Integer col : remainingColumns) {
if (col < 0) {
/* should be always true! */
if (filters.get(col) != null) {
colReaderWithFilter.add(col);
} else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
colReaderWithORFilter.add(col);
}
}
}
// specially for alter add column case:
for (int missingColumn : missingColumns) {
if (filters.get(missingColumn) != null) {
colReaderWithFilter.add(missingColumn);
} else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
colReaderWithORFilter.add(missingColumn);
}
}
return columnReaders;
}
use of io.prestosql.orc.metadata.ColumnMetadata in project hetu-core by openlookeng.
the class StripeReader method createRowGroups.
private List<RowGroup> createRowGroups(int rowsInStripe, Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, List<RowGroupIndex>> columnIndexes, Set<Integer> selectedRowGroups, ColumnMetadata<ColumnEncoding> encodings) throws InvalidCheckpointException {
ImmutableList.Builder<RowGroup> rowGroupBuilder = ImmutableList.builder();
for (int rowGroupId : selectedRowGroups) {
Map<StreamId, StreamCheckpoint> checkpoints = getStreamCheckpoints(includedOrcColumnIds, types, decompressor.isPresent(), rowGroupId, encodings, streams, columnIndexes);
int rowOffset = rowGroupId * rowsInRowGroup;
int rowsInGroup = Math.min(rowsInStripe - rowOffset, rowsInRowGroup);
long minAverageRowBytes = columnIndexes.entrySet().stream().mapToLong(e -> e.getValue().get(rowGroupId).getColumnStatistics().getMinAverageValueSizeInBytes()).sum();
rowGroupBuilder.add(createRowGroup(rowGroupId, rowOffset, rowsInGroup, minAverageRowBytes, valueStreams, checkpoints));
}
return rowGroupBuilder.build();
}
Aggregations