Search in sources :

Example 1 with OutputMutator

use of org.apache.drill.exec.physical.impl.OutputMutator in project drill by apache.

the class TestScanBatchWriters method sanityTest.

@Test
public void sanityTest() throws Exception {
    Scan scanConfig = new AbstractSubScan("bob") {

        @Override
        public String getOperatorType() {
            return "";
        }
    };
    OperatorContext opContext = fixture.newOperatorContext(scanConfig);
    // Setup: normally done by ScanBatch
    VectorContainer container = new VectorContainer(fixture.allocator());
    OutputMutator output = new ScanBatch.Mutator(opContext, fixture.allocator(), container);
    DrillBuf buffer = opContext.getManagedBuffer();
    try (VectorContainerWriter writer = new VectorContainerWriter(output)) {
        // Per-batch
        writer.allocate();
        writer.reset();
        BaseWriter.MapWriter map = writer.rootAsMap();
        // Write one record (10, "Fred", [100, 110, 120] )
        map.integer("a").writeInt(10);
        byte[] bytes = "Fred".getBytes("UTF-8");
        buffer.setBytes(0, bytes, 0, bytes.length);
        map.varChar("b").writeVarChar(0, bytes.length, buffer);
        try (ListWriter list = map.list("c")) {
            list.startList();
            list.integer().writeInt(100);
            list.integer().writeInt(110);
            list.integer().writeInt(120);
            list.endList();
            // Write another record: (20, "Wilma", [])
            writer.setPosition(1);
            map.integer("a").writeInt(20);
            bytes = "Wilma".getBytes("UTF-8");
            buffer.setBytes(0, bytes, 0, bytes.length);
            map.varChar("b").writeVarChar(0, bytes.length, buffer);
            writer.setValueCount(2);
            // Wrap-up done by ScanBatch
            container.setRecordCount(2);
            container.buildSchema(SelectionVectorMode.NONE);
            RowSet rowSet = fixture.wrap(container);
            // Expected
            TupleMetadata schema = new SchemaBuilder().addNullable("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
            RowSet expected = fixture.rowSetBuilder(schema).addRow(10, "Fred", new int[] { 100, 110, 120 }).addRow(20, "Wilma", null).build();
            new RowSetComparison(expected).verifyAndClearAll(rowSet);
        }
    } finally {
        opContext.close();
    }
}
Also used : AbstractSubScan(org.apache.drill.exec.physical.base.AbstractSubScan) VectorContainerWriter(org.apache.drill.exec.vector.complex.impl.VectorContainerWriter) BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) OutputMutator(org.apache.drill.exec.physical.impl.OutputMutator) VectorContainer(org.apache.drill.exec.record.VectorContainer) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) OutputMutator(org.apache.drill.exec.physical.impl.OutputMutator) OperatorContext(org.apache.drill.exec.ops.OperatorContext) ListWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) Scan(org.apache.drill.exec.physical.base.Scan) AbstractSubScan(org.apache.drill.exec.physical.base.AbstractSubScan) DrillBuf(io.netty.buffer.DrillBuf) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 2 with OutputMutator

use of org.apache.drill.exec.physical.impl.OutputMutator in project drill by apache.

the class DrillParquetReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    try {
        this.operatorContext = context;
        schema = footer.getFileMetaData().getSchema();
        MessageType projection;
        final List<SchemaPath> columnsNotFound = new ArrayList<>(getColumns().size());
        if (isStarQuery()) {
            projection = schema;
        } else {
            projection = getProjection(schema, getColumns(), columnsNotFound);
            if (projection == null) {
                projection = schema;
            }
            if (!columnsNotFound.isEmpty()) {
                nullFilledVectors = new ArrayList<>(columnsNotFound.size());
                for (SchemaPath col : columnsNotFound) {
                    // col.toExpr() is used here as field name since we don't want to see these fields in the existing maps
                    nullFilledVectors.add(output.addField(MaterializedField.create(col.toExpr(), OPTIONAL_INT), NullableIntVector.class));
                }
                noColumnsFound = columnsNotFound.size() == getColumns().size();
            }
        }
        logger.debug("Requesting schema {}", projection);
        if (!noColumnsFound) {
            // Discard the columns not found in the schema when create DrillParquetRecordMaterializer, since they have been added to output already.
            @SuppressWarnings("unchecked") Collection<SchemaPath> columns = columnsNotFound.isEmpty() ? getColumns() : CollectionUtils.subtract(getColumns(), columnsNotFound);
            recordMaterializer = new DrillParquetRecordMaterializer(output, projection, columns, fragmentContext.getOptions(), containsCorruptedDates);
        }
        if (numRecordsToRead == 0 || noColumnsFound) {
            // no need to init readers
            return;
        }
        ColumnIOFactory factory = new ColumnIOFactory(false);
        MessageColumnIO columnIO = factory.getColumnIO(projection, schema);
        BlockMetaData blockMetaData = footer.getBlocks().get(entry.getRowGroupIndex());
        Map<ColumnPath, ColumnChunkMetaData> paths = blockMetaData.getColumns().stream().collect(Collectors.toMap(ColumnChunkMetaData::getPath, Function.identity(), (o, n) -> n));
        BufferAllocator allocator = operatorContext.getAllocator();
        CompressionCodecFactory ccf = DrillCompressionCodecFactory.createDirectCodecFactory(drillFileSystem.getConf(), new ParquetDirectByteBufferAllocator(allocator), 0);
        pageReadStore = new ColumnChunkIncReadStore(numRecordsToRead, ccf, allocator, drillFileSystem, entry.getPath());
        for (String[] path : schema.getPaths()) {
            Type type = schema.getType(path);
            if (type.isPrimitive()) {
                ColumnChunkMetaData md = paths.get(ColumnPath.get(path));
                pageReadStore.addColumn(schema.getColumnDescription(path), md);
            }
        }
        recordReader = columnIO.getRecordReader(pageReadStore, recordMaterializer);
    } catch (Exception e) {
        throw handleAndRaise("Failure in setting up reader", e);
    }
}
Also used : Arrays(java.util.Arrays) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) ParquetReaderUtility(org.apache.drill.exec.store.parquet.ParquetReaderUtility) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory) LoggerFactory(org.slf4j.LoggerFactory) OutputMutator(org.apache.drill.exec.physical.impl.OutputMutator) OperatorContext(org.apache.drill.exec.ops.OperatorContext) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) PathSegment(org.apache.drill.common.expression.PathSegment) Map(java.util.Map) RowGroupReadEntry(org.apache.drill.exec.store.parquet.RowGroupReadEntry) Types(org.apache.parquet.schema.Types) ValueVector(org.apache.drill.exec.vector.ValueVector) GroupType(org.apache.parquet.schema.GroupType) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Collectors(java.util.stream.Collectors) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) Type(org.apache.parquet.schema.Type) ExecConstants(org.apache.drill.exec.ExecConstants) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) MaterializedField(org.apache.drill.exec.record.MaterializedField) Function(java.util.function.Function) CommonParquetRecordReader(org.apache.drill.exec.store.CommonParquetRecordReader) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) AllocationHelper(org.apache.drill.exec.vector.AllocationHelper) CollectionUtils(org.apache.commons.collections.CollectionUtils) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) DrillCompressionCodecFactory(org.apache.drill.exec.store.parquet.compression.DrillCompressionCodecFactory) LinkedList(java.util.LinkedList) LinkedHashSet(java.util.LinkedHashSet) FragmentContext(org.apache.drill.exec.ops.FragmentContext) Logger(org.slf4j.Logger) IOException(java.io.IOException) ColumnChunkIncReadStore(org.apache.parquet.hadoop.ColumnChunkIncReadStore) StringJoiner(java.util.StringJoiner) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OPTIONAL_INT(org.apache.drill.common.types.Types.OPTIONAL_INT) RecordReader(org.apache.parquet.io.RecordReader) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) ArrayList(java.util.ArrayList) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) IOException(java.io.IOException) ColumnIOFactory(org.apache.parquet.io.ColumnIOFactory) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) ParquetDirectByteBufferAllocator(org.apache.drill.exec.store.parquet.ParquetDirectByteBufferAllocator) NullableIntVector(org.apache.drill.exec.vector.NullableIntVector) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) CompressionCodecFactory(org.apache.parquet.compression.CompressionCodecFactory) DrillCompressionCodecFactory(org.apache.drill.exec.store.parquet.compression.DrillCompressionCodecFactory) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnChunkIncReadStore(org.apache.parquet.hadoop.ColumnChunkIncReadStore) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

OperatorContext (org.apache.drill.exec.ops.OperatorContext)2 OutputMutator (org.apache.drill.exec.physical.impl.OutputMutator)2 DrillBuf (io.netty.buffer.DrillBuf)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 LinkedHashSet (java.util.LinkedHashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 StringJoiner (java.util.StringJoiner)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 CollectionUtils (org.apache.commons.collections.CollectionUtils)1 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)1 PathSegment (org.apache.drill.common.expression.PathSegment)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 OPTIONAL_INT (org.apache.drill.common.types.Types.OPTIONAL_INT)1