Search in sources :

Example 41 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class SortRecordBatchBuilder method build.

public void build(FragmentContext context, VectorContainer outputContainer) throws SchemaChangeException {
    outputContainer.clear();
    if (batches.keySet().size() > 1) {
        throw new SchemaChangeException("Sort currently only supports a single schema.");
    }
    if (batches.size() > Character.MAX_VALUE) {
        throw new SchemaChangeException("Sort cannot work on more than %d batches at a time.", (int) Character.MAX_VALUE);
    }
    if (batches.keys().size() < 1) {
        assert false : "Invalid to have an empty set of batches with no schemas.";
    }
    final DrillBuf svBuffer = reservation.allocateBuffer();
    if (svBuffer == null) {
        throw new OutOfMemoryError("Failed to allocate direct memory for SV4 vector in SortRecordBatchBuilder.");
    }
    sv4 = new SelectionVector4(svBuffer, recordCount, Character.MAX_VALUE);
    BatchSchema schema = batches.keySet().iterator().next();
    List<RecordBatchData> data = batches.get(schema);
    // now we're going to generate the sv4 pointers
    switch(schema.getSelectionVectorMode()) {
        case NONE:
            {
                int index = 0;
                int recordBatchId = 0;
                for (RecordBatchData d : data) {
                    for (int i = 0; i < d.getRecordCount(); i++, index++) {
                        sv4.set(index, recordBatchId, i);
                    }
                    recordBatchId++;
                }
                break;
            }
        case TWO_BYTE:
            {
                int index = 0;
                int recordBatchId = 0;
                for (RecordBatchData d : data) {
                    for (int i = 0; i < d.getRecordCount(); i++, index++) {
                        sv4.set(index, recordBatchId, (int) d.getSv2().getIndex(i));
                    }
                    // might as well drop the selection vector since we'll stop using it now.
                    d.getSv2().clear();
                    recordBatchId++;
                }
                break;
            }
        default:
            throw new UnsupportedOperationException();
    }
    // next, we'll create lists of each of the vector types.
    ArrayListMultimap<MaterializedField, ValueVector> vectors = ArrayListMultimap.create();
    for (RecordBatchData rbd : batches.values()) {
        for (ValueVector v : rbd.getVectors()) {
            vectors.put(v.getField(), v);
        }
    }
    for (MaterializedField f : schema) {
        List<ValueVector> v = vectors.get(f);
        outputContainer.addHyperList(v, false);
    }
    outputContainer.buildSchema(SelectionVectorMode.FOUR_BYTE);
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 42 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class ProducerConsumerBatch method load.

private boolean load(final RecordBatchData batch) {
    final VectorContainer newContainer = batch.getContainer();
    if (schema != null && newContainer.getSchema().equals(schema)) {
        container.zeroVectors();
        final BatchSchema schema = container.getSchema();
        for (int i = 0; i < container.getNumberOfColumns(); i++) {
            final MaterializedField field = schema.getColumn(i);
            final MajorType type = field.getType();
            final ValueVector vOut = container.getValueAccessorById(TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()), container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
            final ValueVector vIn = newContainer.getValueAccessorById(TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()), newContainer.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()).getValueVector();
            final TransferPair tp = vIn.makeTransferPair(vOut);
            tp.transfer();
        }
        return false;
    } else {
        container.clear();
        for (final VectorWrapper<?> w : newContainer) {
            container.add(w.getValueVector());
        }
        container.buildSchema(SelectionVectorMode.NONE);
        schema = container.getSchema();
        return true;
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) TransferPair(org.apache.drill.exec.record.TransferPair) BatchSchema(org.apache.drill.exec.record.BatchSchema) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 43 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class ProjectRecordBatch method classifyExpr.

private void classifyExpr(final NamedExpression ex, final RecordBatch incoming, final ClassifierResult result) {
    final NameSegment expr = ((SchemaPath) ex.getExpr()).getRootSegment();
    final NameSegment ref = ex.getRef().getRootSegment();
    final boolean exprHasPrefix = expr.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
    final boolean refHasPrefix = ref.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
    final boolean exprIsStar = expr.getPath().equals(StarColumnHelper.STAR_COLUMN);
    final boolean refContainsStar = ref.getPath().contains(StarColumnHelper.STAR_COLUMN);
    final boolean exprContainsStar = expr.getPath().contains(StarColumnHelper.STAR_COLUMN);
    final boolean refEndsWithStar = ref.getPath().endsWith(StarColumnHelper.STAR_COLUMN);
    String exprPrefix = EMPTY_STRING;
    String exprSuffix = expr.getPath();
    if (exprHasPrefix) {
        // get the prefix of the expr
        final String[] exprComponents = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (exprComponents.length == 2);
        exprPrefix = exprComponents[0];
        exprSuffix = exprComponents[1];
        result.prefix = exprPrefix;
    }
    boolean exprIsFirstWildcard = false;
    if (exprContainsStar) {
        result.isStar = true;
        final Integer value = (Integer) result.prefixMap.get(exprPrefix);
        if (value == null) {
            final Integer n = 1;
            result.prefixMap.put(exprPrefix, n);
            exprIsFirstWildcard = true;
        } else {
            final Integer n = value + 1;
            result.prefixMap.put(exprPrefix, n);
        }
    }
    final int incomingSchemaSize = incoming.getSchema().getFieldCount();
    // input is '*' and output is 'prefix_*'
    if (exprIsStar && refHasPrefix && refEndsWithStar) {
        final String[] components = ref.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (components.length == 2);
        final String prefix = components[0];
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String name = vvIn.getField().getPath();
            // add the prefix to the incoming column name
            final String newName = prefix + StarColumnHelper.PREFIX_DELIMITER + name;
            addToResultMaps(newName, result, false);
        }
    } else // input and output are the same
    if (expr.getPath().equalsIgnoreCase(ref.getPath()) && (!exprContainsStar || exprIsFirstWildcard)) {
        if (exprContainsStar && exprHasPrefix) {
            assert exprPrefix != null;
            int k = 0;
            result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
            for (int j = 0; j < incomingSchemaSize; j++) {
                // initialize
                result.outputNames.add(EMPTY_STRING);
            }
            for (final VectorWrapper<?> wrapper : incoming) {
                final ValueVector vvIn = wrapper.getValueVector();
                final String incomingName = vvIn.getField().getPath();
                // get the prefix of the name
                final String[] nameComponents = incomingName.split(StarColumnHelper.PREFIX_DELIMITER, 2);
                // if incoming valuevector does not have a prefix, ignore it since this expression is not referencing it
                if (nameComponents.length <= 1) {
                    k++;
                    continue;
                }
                final String namePrefix = nameComponents[0];
                if (exprPrefix.equalsIgnoreCase(namePrefix)) {
                    final String newName = incomingName;
                    if (!result.outputMap.containsKey(newName)) {
                        result.outputNames.set(k, newName);
                        result.outputMap.put(newName, newName);
                    }
                }
                k++;
            }
        } else {
            result.outputNames = Lists.newArrayList();
            if (exprContainsStar) {
                for (final VectorWrapper<?> wrapper : incoming) {
                    final ValueVector vvIn = wrapper.getValueVector();
                    final String incomingName = vvIn.getField().getPath();
                    if (refContainsStar) {
                        // allow dups since this is likely top-level project
                        addToResultMaps(incomingName, result, true);
                    } else {
                        addToResultMaps(incomingName, result, false);
                    }
                }
            } else {
                final String newName = expr.getPath();
                if (!refHasPrefix && !exprHasPrefix) {
                    // allow dups since this is likely top-level project
                    addToResultMaps(newName, result, true);
                } else {
                    addToResultMaps(newName, result, false);
                }
            }
        }
    } else // input is wildcard and it is not the first wildcard
    if (exprIsStar) {
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String incomingName = vvIn.getField().getPath();
            // allow dups since this is likely top-level project
            addToResultMaps(incomingName, result, true);
        }
    } else // only the output has prefix
    if (!exprHasPrefix && refHasPrefix) {
        result.outputNames = Lists.newArrayList();
        final String newName = ref.getPath();
        addToResultMaps(newName, result, false);
    } else // input has prefix but output does not
    if (exprHasPrefix && !refHasPrefix) {
        int k = 0;
        result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
        for (int j = 0; j < incomingSchemaSize; j++) {
            // initialize
            result.outputNames.add(EMPTY_STRING);
        }
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String name = vvIn.getField().getPath();
            final String[] components = name.split(StarColumnHelper.PREFIX_DELIMITER, 2);
            if (components.length <= 1) {
                k++;
                continue;
            }
            final String namePrefix = components[0];
            final String nameSuffix = components[1];
            if (exprPrefix.equalsIgnoreCase(namePrefix)) {
                // // case insensitive matching of prefix.
                if (refContainsStar) {
                    // remove the prefix from the incoming column names
                    // for top level we need to make names unique
                    final String newName = getUniqueName(nameSuffix, result);
                    result.outputNames.set(k, newName);
                } else if (exprSuffix.equalsIgnoreCase(nameSuffix)) {
                    // case insensitive matching of field name.
                    // example: ref: $f1, expr: T0<PREFIX><column_name>
                    final String newName = ref.getPath();
                    result.outputNames.set(k, newName);
                }
            } else {
                result.outputNames.add(EMPTY_STRING);
            }
            k++;
        }
    } else // input and output have prefixes although they could be different...
    if (exprHasPrefix && refHasPrefix) {
        final String[] input = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (input.length == 2);
        // not handled yet
        assert false : "Unexpected project expression or reference";
    } else {
        // if the incoming schema's column name matches the expression name of the Project,
        // then we just want to pick the ref name as the output column name
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String incomingName = vvIn.getField().getPath();
            if (expr.getPath().equalsIgnoreCase(incomingName)) {
                // case insensitive matching of field name.
                final String newName = ref.getPath();
                addToResultMaps(newName, result, true);
            }
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Example 44 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class ExpressionInterpreterTest method doTest.

protected void doTest(String expressionStr, String[] colNames, TypeProtos.MajorType[] colTypes, String[] expectFirstTwoValues, BitControl.PlanFragment planFragment) throws Exception {
    @SuppressWarnings("resource") final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
    @SuppressWarnings("resource") final Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
    bit1.run();
    // Create a mock scan batch as input for evaluation.
    assertEquals(colNames.length, colTypes.length);
    final MockTableDef.MockColumn[] columns = new MockTableDef.MockColumn[colNames.length];
    for (int i = 0; i < colNames.length; i++) {
        columns[i] = new MockTableDef.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(), 0, 0, 0, null, null, null);
    }
    final MockTableDef.MockScanEntry entry = new MockTableDef.MockScanEntry(10, false, 0, 1, columns);
    final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", false, java.util.Collections.singletonList(entry));
    @SuppressWarnings("resource") final ScanBatch batch = createMockScanBatch(bit1, scanPOP, planFragment);
    batch.next();
    @SuppressWarnings("resource") final ValueVector vv = evalExprWithInterpreter(expressionStr, batch, bit1);
    // Verify the first 2 values in the output of evaluation.
    assertEquals(2, expectFirstTwoValues.length);
    assertEquals(expectFirstTwoValues[0], getValueFromVector(vv, 0));
    assertEquals(expectFirstTwoValues[1], getValueFromVector(vv, 1));
    showValueVectorContent(vv);
    vv.clear();
    batch.close();
    batch.getContext().close();
    bit1.close();
}
Also used : MockSubScanPOP(org.apache.drill.exec.store.mock.MockSubScanPOP) ValueVector(org.apache.drill.exec.vector.ValueVector) Drillbit(org.apache.drill.exec.server.Drillbit) MockTableDef(org.apache.drill.exec.store.mock.MockTableDef) RemoteServiceSet(org.apache.drill.exec.server.RemoteServiceSet) ScanBatch(org.apache.drill.exec.physical.impl.ScanBatch)

Example 45 with ValueVector

use of org.apache.drill.exec.vector.ValueVector in project drill by apache.

the class ParquetResultListener method dataArrived.

@Override
public synchronized void dataArrived(QueryDataBatch result, ConnectionThrottle throttle) {
    logger.debug("result arrived in test batch listener.");
    int columnValCounter = 0;
    FieldInfo currentField;
    count += result.getHeader().getRowCount();
    boolean schemaChanged = false;
    final RecordBatchLoader batchLoader = new RecordBatchLoader(allocator);
    try {
        schemaChanged = batchLoader.load(result.getHeader().getDef(), result.getData());
    // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
    // SchemaChangeException, so check/clean catch clause below.
    } catch (SchemaChangeException e) {
        throw new RuntimeException(e);
    }
    // used to make sure each vector in the batch has the same number of records
    int valueCount = batchLoader.getRecordCount();
    // print headers.
    if (schemaChanged) {
    }
    for (final VectorWrapper vw : batchLoader) {
        final ValueVector vv = vw.getValueVector();
        currentField = props.fields.get(vv.getField().getPath());
        if (!valuesChecked.containsKey(vv.getField().getPath())) {
            valuesChecked.put(vv.getField().getPath(), 0);
            columnValCounter = 0;
        } else {
            columnValCounter = valuesChecked.get(vv.getField().getPath());
        }
        printColumnMajor(vv);
        if (testValues) {
            for (int j = 0; j < vv.getAccessor().getValueCount(); j++) {
                assertField(vv, j, currentField.type, currentField.values[columnValCounter % 3], currentField.name + "/");
                columnValCounter++;
            }
        } else {
            columnValCounter += vv.getAccessor().getValueCount();
        }
        valuesChecked.remove(vv.getField().getPath());
        assertEquals("Mismatched value count for vectors in the same batch.", valueCount, vv.getAccessor().getValueCount());
        valuesChecked.put(vv.getField().getPath(), columnValCounter);
    }
    if (ParquetRecordReaderTest.VERBOSE_DEBUG) {
        printRowMajor(batchLoader);
    }
    batchCounter++;
    batchLoader.clear();
    result.release();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Aggregations

ValueVector (org.apache.drill.exec.vector.ValueVector)130 MaterializedField (org.apache.drill.exec.record.MaterializedField)29 Test (org.junit.Test)21 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)18 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)13 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)13 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)12 ExecTest (org.apache.drill.exec.ExecTest)11 IOException (java.io.IOException)10 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)10 VectorContainer (org.apache.drill.exec.record.VectorContainer)10 Drillbit (org.apache.drill.exec.server.Drillbit)10 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)9 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)9 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)9 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)8 FunctionImplementationRegistry (org.apache.drill.exec.expr.fn.FunctionImplementationRegistry)8 FragmentContext (org.apache.drill.exec.ops.FragmentContext)8 PhysicalPlan (org.apache.drill.exec.physical.PhysicalPlan)8