Examples with OPTIONAL_INT - org.apache.drill.common.types.Types.OPTIONAL

Example 1 with OPTIONAL_INT

use of org.apache.drill.common.types.Types.OPTIONAL_INT in project drill by apache.

the class DrillParquetReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    try {
        this.operatorContext = context;
        schema = footer.getFileMetaData().getSchema();
        MessageType projection;
        final List<SchemaPath> columnsNotFound = new ArrayList<>(getColumns().size());
        if (isStarQuery()) {
            projection = schema;
        } else {
            projection = getProjection(schema, getColumns(), columnsNotFound);
            if (projection == null) {
                projection = schema;
            }
            if (!columnsNotFound.isEmpty()) {
                nullFilledVectors = new ArrayList<>(columnsNotFound.size());
                for (SchemaPath col : columnsNotFound) {
                    // col.toExpr() is used here as field name since we don't want to see these fields in the existing maps
                    nullFilledVectors.add(output.addField(MaterializedField.create(col.toExpr(), OPTIONAL_INT), NullableIntVector.class));
                }
                noColumnsFound = columnsNotFound.size() == getColumns().size();
            }
        }
        logger.debug("Requesting schema {}", projection);
        if (!noColumnsFound) {
            // Discard the columns not found in the schema when create DrillParquetRecordMaterializer, since they have been added to output already.
            @SuppressWarnings("unchecked") Collection<SchemaPath> columns = columnsNotFound.isEmpty() ? getColumns() : CollectionUtils.subtract(getColumns(), columnsNotFound);
            recordMaterializer = new DrillParquetRecordMaterializer(output, projection, columns, fragmentContext.getOptions(), containsCorruptedDates);
        }
        if (numRecordsToRead == 0 || noColumnsFound) {
            // no need to init readers
            return;
        }
        ColumnIOFactory factory = new ColumnIOFactory(false);
        MessageColumnIO columnIO = factory.getColumnIO(projection, schema);
        BlockMetaData blockMetaData = footer.getBlocks().get(entry.getRowGroupIndex());
        Map<ColumnPath, ColumnChunkMetaData> paths = blockMetaData.getColumns().stream().collect(Collectors.toMap(ColumnChunkMetaData::getPath, Function.identity(), (o, n) -> n));
        BufferAllocator allocator = operatorContext.getAllocator();
        CompressionCodecFactory ccf = DrillCompressionCodecFactory.createDirectCodecFactory(drillFileSystem.getConf(), new ParquetDirectByteBufferAllocator(allocator), 0);
        pageReadStore = new ColumnChunkIncReadStore(numRecordsToRead, ccf, allocator, drillFileSystem, entry.getPath());
        for (String[] path : schema.getPaths()) {
            Type type = schema.getType(path);
            if (type.isPrimitive()) {
                ColumnChunkMetaData md = paths.get(ColumnPath.get(path));
                pageReadStore.addColumn(schema.getColumnDescription(path), md);
            }
        }
        recordReader = columnIO.getRecordReader(pageReadStore, recordMaterializer);
    } catch (Exception e) {
        throw handleAndRaise("Failure in setting up reader", e);
    }
}

Also used : Arrays(java.util.Arrays) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) ParquetReaderUtility(org.apache.drill.exec.store.parquet.ParquetReaderUtility) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory) LoggerFactory(org.slf4j.LoggerFactory) OutputMutator(org.apache.drill.exec.physical.impl.OutputMutator) OperatorContext(org.apache.drill.exec.ops.OperatorContext) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) PathSegment(org.apache.drill.common.expression.PathSegment) Map(java.util.Map) RowGroupReadEntry(org.apache.drill.exec.store.parquet.RowGroupReadEntry) Types(org.apache.parquet.schema.Types) ValueVector(org.apache.drill.exec.vector.ValueVector) GroupType(org.apache.parquet.schema.GroupType) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Collectors(java.util.stream.Collectors) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) Type(org.apache.parquet.schema.Type) ExecConstants(org.apache.drill.exec.ExecConstants) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) Function(java.util.function.Function) CommonParquetRecordReader(org.apache.drill.exec.store.CommonParquetRecordReader) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) AllocationHelper(org.apache.drill.exec.vector.AllocationHelper) CollectionUtils(org.apache.commons.collections.CollectionUtils) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) DrillCompressionCodecFactory(org.apache.drill.exec.store.parquet.compression.DrillCompressionCodecFactory) LinkedList(java.util.LinkedList) LinkedHashSet(java.util.LinkedHashSet) FragmentContext(org.apache.drill.exec.ops.FragmentContext) Logger(org.slf4j.Logger) IOException(java.io.IOException) ColumnChunkIncReadStore(org.apache.parquet.hadoop.ColumnChunkIncReadStore) StringJoiner(java.util.StringJoiner) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OPTIONAL_INT(org.apache.drill.common.types.Types.OPTIONAL_INT) RecordReader(org.apache.parquet.io.RecordReader) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) ArrayList(java.util.ArrayList) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) IOException(java.io.IOException) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) DrillCompressionCodecFactory(org.apache.drill.exec.store.parquet.compression.DrillCompressionCodecFactory) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnChunkIncReadStore(org.apache.parquet.hadoop.ColumnChunkIncReadStore) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 LinkedHashSet (java.util.LinkedHashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 StringJoiner (java.util.StringJoiner)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 CollectionUtils (org.apache.commons.collections.CollectionUtils)1 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)1 PathSegment (org.apache.drill.common.expression.PathSegment)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 OPTIONAL_INT (org.apache.drill.common.types.Types.OPTIONAL_INT)1 ExecConstants (org.apache.drill.exec.ExecConstants)1 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)1 BufferAllocator (org.apache.drill.exec.memory.BufferAllocator)1