Search in sources :

Example 81 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class MockRecordReader method next.

@Override
public int next() {
    if (recordsRead >= this.config.getRecords()) {
        return 0;
    }
    final int recordSetSize = Math.min(batchRecordCount, this.config.getRecords() - recordsRead);
    recordsRead += recordSetSize;
    for (final ValueVector v : valueVectors) {
        final ValueVector.Mutator m = v.getMutator();
        m.generateTestData(recordSetSize);
    }
    return recordSetSize;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector)

Example 82 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
   * Add to result vectors and compare batch schema against expected schema while iterating batches.
   * @param batches
   * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
   *                       if encounter different batch schema.
   * @return
   * @throws SchemaChangeException
   * @throws UnsupportedEncodingException
   */
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    Map<String, List<Object>> combinedVectors = new TreeMap<>();
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        if (expectedSchema != null) {
            if (!expectedSchema.equals(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        // SchemaChangeException, so check/clean throws clause above.
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    return combinedVectors;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) ArrayList(java.util.ArrayList) List(java.util.List) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 83 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class DrillTestWrapper method addToHyperVectorMap.

private Map<String, HyperVectorValueIterator> addToHyperVectorMap(final List<QueryDataBatch> records, final RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    Map<String, HyperVectorValueIterator> combinedVectors = new TreeMap<>();
    long totalRecords = 0;
    QueryDataBatch batch;
    int size = records.size();
    for (int i = 0; i < size; i++) {
        batch = records.get(i);
        loader.load(batch.getHeader().getDef(), batch.getData());
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
            if (!combinedVectors.containsKey(field)) {
                MaterializedField mf = w.getField();
                ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1);
                vvList[0] = w.getValueVector();
                combinedVectors.put(field, new HyperVectorValueIterator(mf, new HyperVectorWrapper<>(mf, vvList)));
            } else {
                combinedVectors.get(field).getHyperVector().addVector(w.getValueVector());
            }
        }
    }
    for (HyperVectorValueIterator hvi : combinedVectors.values()) {
        hvi.determineTotalSize();
    }
    return combinedVectors;
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) MaterializedField(org.apache.drill.exec.record.MaterializedField) HyperVectorWrapper(org.apache.drill.exec.record.HyperVectorWrapper) TreeMap(java.util.TreeMap) HyperVectorValueIterator(org.apache.drill.exec.HyperVectorValueIterator)

Example 84 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class MergingRecordBatch method buildSchema.

@Override
public void buildSchema() throws SchemaChangeException {
    // find frag provider that has data to use to build schema, and put in tempBatchHolder for later use
    tempBatchHolder = new RawFragmentBatch[fragProviders.length];
    int i = 0;
    try {
        while (true) {
            if (i >= fragProviders.length) {
                state = BatchState.DONE;
                return;
            }
            final RawFragmentBatch batch = getNext(i);
            if (batch == null) {
                if (!context.shouldContinue()) {
                    state = BatchState.STOP;
                } else {
                    state = BatchState.DONE;
                }
                break;
            }
            if (batch.getHeader().getDef().getFieldCount() == 0) {
                i++;
                continue;
            }
            tempBatchHolder[i] = batch;
            for (final SerializedField field : batch.getHeader().getDef().getFieldList()) {
                @SuppressWarnings("resource") final ValueVector v = outgoingContainer.addOrGet(MaterializedField.create(field));
                v.allocateNew();
            }
            break;
        }
    } catch (final IOException e) {
        throw new DrillRuntimeException(e);
    }
    outgoingContainer = VectorContainer.canonicalize(outgoingContainer);
    outgoingContainer.buildSchema(SelectionVectorMode.NONE);
}
Also used : RawFragmentBatch(org.apache.drill.exec.record.RawFragmentBatch) ValueVector(org.apache.drill.exec.vector.ValueVector) SerializedField(org.apache.drill.exec.proto.UserBitShared.SerializedField) IOException(java.io.IOException) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint)

Example 85 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class HashAggBatch method createAggregatorInternal.

private HashAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException {
    CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    ClassGenerator<HashAggregator> cg = top.getRoot();
    ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
    top.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    //    top.saveCodeForDebugging(true);
    container.clear();
    int numGroupByExprs = (popConfig.getGroupByExprs() != null) ? popConfig.getGroupByExprs().size() : 0;
    int numAggrExprs = (popConfig.getAggrExprs() != null) ? popConfig.getAggrExprs().size() : 0;
    aggrExprs = new LogicalExpression[numAggrExprs];
    groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
    aggrOutFieldIds = new TypedFieldId[numAggrExprs];
    ErrorCollector collector = new ErrorCollectorImpl();
    int i;
    for (i = 0; i < numGroupByExprs; i++) {
        NamedExpression ne = popConfig.getGroupByExprs().get(i);
        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
        if (expr == null) {
            continue;
        }
        final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
        @SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
        // add this group-by vector to the output container
        groupByOutFieldIds[i] = container.add(vv);
    }
    for (i = 0; i < numAggrExprs; i++) {
        NamedExpression ne = popConfig.getAggrExprs().get(i);
        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
        if (expr instanceof IfExpression) {
            throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
        }
        if (collector.hasErrors()) {
            throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString());
        }
        if (expr == null) {
            continue;
        }
        final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
        @SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
        aggrOutFieldIds[i] = container.add(vv);
        aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
    }
    setupUpdateAggrValues(cgInner);
    setupGetIndex(cg);
    cg.getBlock("resetValues")._return(JExpr.TRUE);
    container.buildSchema(SelectionVectorMode.NONE);
    HashAggregator agg = context.getImplementationClass(top);
    HashTableConfig htConfig = // TODO - fix the validator on this option
    new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, popConfig.getGroupByExprs(), null, /* no probe exprs */
    comparators);
    agg.setup(popConfig, htConfig, context, this.stats, oContext.getAllocator(), incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), groupByOutFieldIds, this.container);
    return agg;
}
Also used : IfExpression(org.apache.drill.common.expression.IfExpression) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) MaterializedField(org.apache.drill.exec.record.MaterializedField) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) ValueVector(org.apache.drill.exec.vector.ValueVector) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) HashTableConfig(org.apache.drill.exec.physical.impl.common.HashTableConfig) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ValueVectorWriteExpression(org.apache.drill.exec.expr.ValueVectorWriteExpression)

Aggregations

ValueVector (org.apache.drill.exec.vector.ValueVector)130 MaterializedField (org.apache.drill.exec.record.MaterializedField)29 Test (org.junit.Test)21 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)18 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)13 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)13 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)12 ExecTest (org.apache.drill.exec.ExecTest)11 IOException (java.io.IOException)10 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)10 VectorContainer (org.apache.drill.exec.record.VectorContainer)10 Drillbit (org.apache.drill.exec.server.Drillbit)10 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)9 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)9 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)9 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)8 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)8 FragmentContext (org.apache.drill.exec.ops.FragmentContext)8 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)8