use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.
the class OrcReader method createOrcColumn.
private static OrcColumn createOrcColumn(String parentStreamName, String fieldName, OrcColumnId columnId, ColumnMetadata<OrcType> types, OrcDataSourceId orcDataSourceId) {
String path = fieldName.isEmpty() ? parentStreamName : parentStreamName + "." + fieldName;
OrcType orcType = types.get(columnId);
List<OrcColumn> nestedColumns = ImmutableList.of();
if (orcType.getOrcTypeKind() == OrcTypeKind.STRUCT) {
nestedColumns = IntStream.range(0, orcType.getFieldCount()).mapToObj(fieldId -> createOrcColumn(path, orcType.getFieldName(fieldId), orcType.getFieldTypeIndex(fieldId), types, orcDataSourceId)).collect(toImmutableList());
} else if (orcType.getOrcTypeKind() == OrcTypeKind.LIST) {
nestedColumns = ImmutableList.of(createOrcColumn(path, "item", orcType.getFieldTypeIndex(0), types, orcDataSourceId));
} else if (orcType.getOrcTypeKind() == OrcTypeKind.MAP) {
nestedColumns = ImmutableList.of(createOrcColumn(path, "key", orcType.getFieldTypeIndex(0), types, orcDataSourceId), createOrcColumn(path, "value", orcType.getFieldTypeIndex(1), types, orcDataSourceId));
}
return new OrcColumn(path, columnId, fieldName, orcType.getOrcTypeKind(), orcDataSourceId, nestedColumns);
}
use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.
the class StripeReader method getRowGroupStatistics.
private static ColumnMetadata<ColumnStatistics> getRowGroupStatistics(ColumnMetadata<OrcType> types, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
requireNonNull(columnIndexes, "columnIndexes is null");
checkArgument(rowGroup >= 0, "rowGroup is negative");
Map<Integer, List<RowGroupIndex>> rowGroupIndexesByColumn = columnIndexes.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getColumnId().getId(), Entry::getValue));
List<ColumnStatistics> statistics = new ArrayList<>(types.size());
for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
List<RowGroupIndex> rowGroupIndexes = rowGroupIndexesByColumn.get(columnIndex);
if (rowGroupIndexes != null) {
statistics.add(rowGroupIndexes.get(rowGroup).getColumnStatistics());
} else {
statistics.add(null);
}
}
return new ColumnMetadata<>(statistics);
}
use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.
the class ColumnWriters method createColumnWriter.
public static ColumnWriter createColumnWriter(OrcColumnId columnId, ColumnMetadata<OrcType> orcTypes, Type type, CompressionKind compression, int bufferSize, DataSize stringStatisticsLimit) {
requireNonNull(type, "type is null");
OrcType orcType = orcTypes.get(columnId);
switch(orcType.getOrcTypeKind()) {
case BOOLEAN:
return new BooleanColumnWriter(columnId, type, compression, bufferSize);
case FLOAT:
return new FloatColumnWriter(columnId, type, compression, bufferSize);
case DOUBLE:
return new DoubleColumnWriter(columnId, type, compression, bufferSize);
case BYTE:
return new ByteColumnWriter(columnId, type, compression, bufferSize);
case DATE:
return new LongColumnWriter(columnId, type, compression, bufferSize, DateStatisticsBuilder::new);
case SHORT:
case INT:
case LONG:
return new LongColumnWriter(columnId, type, compression, bufferSize, IntegerStatisticsBuilder::new);
case DECIMAL:
return new DecimalColumnWriter(columnId, type, compression, bufferSize);
case TIMESTAMP:
return new TimestampColumnWriter(columnId, type, compression, bufferSize);
case BINARY:
return new SliceDirectColumnWriter(columnId, type, compression, bufferSize, BinaryStatisticsBuilder::new);
case CHAR:
case VARCHAR:
case STRING:
return new SliceDictionaryColumnWriter(columnId, type, compression, bufferSize, stringStatisticsLimit);
case LIST:
{
OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(0);
Type fieldType = type.getTypeParameters().get(0);
ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit);
return new ListColumnWriter(columnId, compression, bufferSize, elementWriter);
}
case MAP:
{
ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), compression, bufferSize, stringStatisticsLimit);
ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), compression, bufferSize, stringStatisticsLimit);
return new MapColumnWriter(columnId, compression, bufferSize, keyWriter, valueWriter);
}
case STRUCT:
{
ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
Type fieldType = type.getTypeParameters().get(fieldId);
fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit));
}
return new StructColumnWriter(columnId, compression, bufferSize, fieldWriters.build());
}
}
throw new IllegalArgumentException("Unsupported type: " + type);
}
use of io.prestosql.orc.metadata.OrcType in project hetu-core by openlookeng.
the class OrcSelectiveRecordReader method createColumnReaders.
public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
colReaderWithFilter = new IntArraySet();
colReaderWithORFilter = new IntArraySet();
colReaderWithoutFilter = new IntArraySet();
IntArraySet remainingColumns = new IntArraySet();
remainingColumns.addAll(includedColumns.keySet());
for (int i = 0; i < fieldCount; i++) {
// create column reader only for columns which are part of projection and filter.
if (includedColumns.containsKey(i)) {
int columnIndex = i;
OrcColumn column = fileColumns.get(columnIndex);
boolean outputRequired = outputColumns.contains(i);
SelectiveColumnReader columnReader = null;
if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
} else {
columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
if (orcCacheProperties.isRowDataCacheEnabled()) {
columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
}
}
columnReaders[columnIndex] = columnReader;
if (filters.get(i) != null) {
colReaderWithFilter.add(columnIndex);
} else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
colReaderWithORFilter.add(columnIndex);
} else {
colReaderWithoutFilter.add(columnIndex);
}
remainingColumns.remove(columnIndex);
}
}
/* if any still remaining colIdx < 0 */
remainingColumns.removeAll(missingColumns);
for (Integer col : remainingColumns) {
if (col < 0) {
/* should be always true! */
if (filters.get(col) != null) {
colReaderWithFilter.add(col);
} else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
colReaderWithORFilter.add(col);
}
}
}
// specially for alter add column case:
for (int missingColumn : missingColumns) {
if (filters.get(missingColumn) != null) {
colReaderWithFilter.add(missingColumn);
} else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
colReaderWithORFilter.add(missingColumn);
}
}
return columnReaders;
}
Aggregations