Search in sources :

Example 56 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.

the class FlattenRecordBatch method setupNewSchema.

@Override
protected boolean setupNewSchema() throws SchemaChangeException {
    this.allocationVectors = Lists.newArrayList();
    container.clear();
    final List<NamedExpression> exprs = getExpressionList();
    final ErrorCollector collector = new ErrorCollectorImpl();
    final List<TransferPair> transfers = Lists.newArrayList();
    final ClassGenerator<Flattener> cg = CodeGenerator.getRoot(Flattener.TEMPLATE_DEFINITION, context.getOptions());
    cg.getCodeGenerator().plainJavaCapable(true);
    final IntHashSet transferFieldIds = new IntHashSet();
    final NamedExpression flattenExpr = new NamedExpression(popConfig.getColumn(), new FieldReference(popConfig.getColumn()));
    final ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) ExpressionTreeMaterializer.materialize(flattenExpr.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
    final FieldReference fieldReference = flattenExpr.getRef();
    final TransferPair transferPair = getFlattenFieldTransferPair(fieldReference);
    if (transferPair != null) {
        final ValueVector flattenVector = transferPair.getTo();
        // checks that list has only default ValueVector and replaces resulting ValueVector to INT typed ValueVector
        if (exprs.size() == 0 && flattenVector.getField().getType().equals(Types.LATE_BIND_TYPE)) {
            final MaterializedField outputField = MaterializedField.create(fieldReference.getAsNamePart().getName(), Types.OPTIONAL_INT);
            final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
            container.add(vector);
        } else {
            transfers.add(transferPair);
            container.add(flattenVector);
            transferFieldIds.add(vectorRead.getFieldId().getFieldIds()[0]);
        }
    }
    logger.debug("Added transfer for project expression.");
    ClassifierResult result = new ClassifierResult();
    for (NamedExpression namedExpression : exprs) {
        result.clear();
        String outputName = getRef(namedExpression).getRootSegment().getPath();
        if (result != null && result.outputNames != null && result.outputNames.size() > 0) {
            for (int j = 0; j < result.outputNames.size(); j++) {
                if (!result.outputNames.get(j).equals(EMPTY_STRING)) {
                    outputName = result.outputNames.get(j);
                    break;
                }
            }
        }
        final LogicalExpression expr = ExpressionTreeMaterializer.materialize(namedExpression.getExpr(), incoming, collector, context.getFunctionRegistry(), true);
        if (collector.hasErrors()) {
            throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
        }
        if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
            // Lazy initialization of the list of complex writers, if not done yet.
            if (complexWriters == null) {
                complexWriters = Lists.newArrayList();
            }
            // The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
            ((DrillFuncHolderExpr) expr).getFieldReference(namedExpression.getRef());
            cg.addExpr(expr);
        } else {
            // need to do evaluation.
            final MaterializedField outputField;
            if (expr instanceof ValueVectorReadExpression) {
                final TypedFieldId id = ValueVectorReadExpression.class.cast(expr).getFieldId();
                @SuppressWarnings("resource") final ValueVector incomingVector = incoming.getValueAccessorById(id.getIntermediateClass(), id.getFieldIds()).getValueVector();
                // when the first batch will be empty.
                if (incomingVector != null) {
                    outputField = incomingVector.getField().clone();
                } else {
                    outputField = MaterializedField.create(outputName, expr.getMajorType());
                }
            } else {
                outputField = MaterializedField.create(outputName, expr.getMajorType());
            }
            @SuppressWarnings("resource") final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
            allocationVectors.add(vector);
            TypedFieldId fid = container.add(vector);
            ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
            cg.addExpr(write);
            logger.debug("Added eval for project expression.");
        }
    }
    cg.rotateBlock();
    cg.getEvalBlock()._return(JExpr.TRUE);
    container.buildSchema(SelectionVectorMode.NONE);
    try {
        this.flattener = context.getImplementationClass(cg.getCodeGenerator());
        flattener.setup(context, incoming, this, transfers);
    } catch (ClassTransformationException | IOException e) {
        throw new SchemaChangeException("Failure while attempting to load generated class", e);
    }
    return true;
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) IntHashSet(com.carrotsearch.hppc.IntHashSet) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) DrillFuncHolderExpr(org.apache.drill.exec.expr.DrillFuncHolderExpr) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) ValueVectorWriteExpression(org.apache.drill.exec.expr.ValueVectorWriteExpression) FieldReference(org.apache.drill.common.expression.FieldReference) ClassTransformationException(org.apache.drill.exec.exception.ClassTransformationException) MaterializedField(org.apache.drill.exec.record.MaterializedField) IOException(java.io.IOException) ValueVectorReadExpression(org.apache.drill.exec.expr.ValueVectorReadExpression) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression)

Example 57 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.

the class HashJoinBatch method setupHashTable.

public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
    // Setup the hash table configuration object
    int conditionsSize = conditions.size();
    final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
    List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);
    // Create named expressions from the conditions
    for (int i = 0; i < conditionsSize; i++) {
        rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
        leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
    }
    // Set the left named expression to be null if the probe batch is empty.
    if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
        leftExpr = null;
    } else {
        if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
            final String errorMsg = new StringBuilder().append("Hash join does not support probe batch with selection vectors. ").append("Probe batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
            throw new SchemaChangeException(errorMsg);
        }
    }
    final HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);
    // Create the chained hash table
    final ChainedHashTable ht = new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
    hashTable = ht.createAndSetupHashTable(null, 1);
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) HashTableConfig(org.apache.drill.exec.physical.impl.common.HashTableConfig) FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 58 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by axbaretto.

the class HashAggTemplate method setup.

@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) throws SchemaChangeException, IOException {
    if (valueExprs == null || valueFieldIds == null) {
        throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
    }
    if (valueFieldIds.size() < valueExprs.length) {
        throw new IllegalArgumentException("Wrong number of workspace variables.");
    }
    this.context = context;
    this.stats = oContext.getStats();
    this.allocator = oContext.getAllocator();
    this.oContext = oContext;
    this.incoming = incoming;
    this.outgoing = outgoing;
    this.outContainer = outContainer;
    this.operatorId = hashAggrConfig.getOperatorId();
    this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
    is2ndPhase = hashAggrConfig.getAggPhase() == AggPrelBase.OperatorPhase.PHASE_2of2;
    isTwoPhase = hashAggrConfig.getAggPhase() != AggPrelBase.OperatorPhase.PHASE_1of1;
    is1stPhase = isTwoPhase && !is2ndPhase;
    // single phase can not spill
    canSpill = isTwoPhase;
    // Typically for testing - force a spill after a partition has more than so many batches
    minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
    // Set the memory limit
    long memoryLimit = allocator.getLimit();
    // Optional configured memory limit, typically used only for testing.
    long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
    if (configLimit > 0) {
        logger.warn("Memory limit was changed to {}", configLimit);
        memoryLimit = Math.min(memoryLimit, configLimit);
        // enforce at the allocator
        allocator.setLimit(memoryLimit);
    }
    // TODO:  This functionality will be added later.
    if (hashAggrConfig.getGroupByExprs().size() == 0) {
        throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
    }
    this.htIdxHolder = new IndexPointer();
    this.outStartIdxHolder = new IndexPointer();
    this.outNumRecordsHolder = new IndexPointer();
    materializedValueFields = new MaterializedField[valueFieldIds.size()];
    if (valueFieldIds.size() > 0) {
        int i = 0;
        FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
        for (TypedFieldId id : valueFieldIds) {
            materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
        }
    }
    spillSet = new SpillSet(context, hashAggrConfig);
    baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
    outgoing);
    // retain these for delayedSetup, and to allow recreating hash tables (after a spill)
    this.groupByOutFieldIds = groupByOutFieldIds;
    numGroupByOutFields = groupByOutFieldIds.length;
    // Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize() )
    estRowWidth = extraRowBytes;
    estValuesRowWidth = extraRowBytes;
    doSetup(incoming);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Example 59 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TopNBatchTest method priorityQueueOrderingTest.

/**
 * Priority queue unit test.
 * @throws Exception
 */
@Test
public void priorityQueueOrderingTest() throws Exception {
    Properties properties = new Properties();
    DrillConfig drillConfig = DrillConfig.create(properties);
    DrillbitContext drillbitContext = mockDrillbitContext();
    when(drillbitContext.getFunctionImplementationRegistry()).thenReturn(new FunctionImplementationRegistry(drillConfig));
    FieldReference expr = FieldReference.getWithQuotedRef("colA");
    Order.Ordering ordering = new Order.Ordering(Order.Ordering.ORDER_DESC, expr, Order.Ordering.NULLS_FIRST);
    List<Order.Ordering> orderings = Lists.newArrayList(ordering);
    MaterializedField colA = MaterializedField.create("colA", Types.required(TypeProtos.MinorType.INT));
    MaterializedField colB = MaterializedField.create("colB", Types.required(TypeProtos.MinorType.INT));
    List<MaterializedField> cols = Lists.newArrayList(colA, colB);
    BatchSchema batchSchema = new BatchSchema(BatchSchema.SelectionVectorMode.NONE, cols);
    FragmentContextImpl context = new FragmentContextImpl(drillbitContext, BitControl.PlanFragment.getDefaultInstance(), null, drillbitContext.getFunctionImplementationRegistry());
    RowSet expectedRowSet;
    try (RootAllocator allocator = new RootAllocator(100_000_000)) {
        expectedRowSet = new RowSetBuilder(allocator, batchSchema).addRow(110, 10).addRow(109, 9).addRow(108, 8).addRow(107, 7).addRow(106, 6).addRow(105, 5).addRow(104, 4).addRow(103, 3).addRow(102, 2).addRow(101, 1).build();
        PriorityQueue queue;
        ExpandableHyperContainer hyperContainer;
        {
            VectorContainer container = new RowSetBuilder(allocator, batchSchema).build().container();
            hyperContainer = new ExpandableHyperContainer(container);
            queue = TopNBatch.createNewPriorityQueue(TopNBatch.createMainMappingSet(), TopNBatch.createLeftMappingSet(), TopNBatch.createRightMappingSet(), orderings, hyperContainer, false, true, 10, allocator, batchSchema.getSelectionVectorMode(), context);
        }
        List<RecordBatchData> testBatches = Lists.newArrayList();
        try {
            final Random random = new Random();
            final int bound = 100;
            final int numBatches = 11;
            final int numRecordsPerBatch = 100;
            for (int batchCounter = 0; batchCounter < numBatches; batchCounter++) {
                RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, batchSchema);
                rowSetBuilder.addRow((batchCounter + bound), batchCounter);
                for (int recordCounter = 0; recordCounter < numRecordsPerBatch; recordCounter++) {
                    rowSetBuilder.addRow(random.nextInt(bound), random.nextInt(bound));
                }
                VectorContainer vectorContainer = rowSetBuilder.build().container();
                queue.add(new RecordBatchData(vectorContainer, allocator));
            }
            queue.generate();
            VectorContainer resultContainer = queue.getHyperBatch();
            resultContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
            RowSet.HyperRowSet actualHyperSet = HyperRowSetImpl.fromContainer(resultContainer, queue.getFinalSv4());
            new RowSetComparison(expectedRowSet).verify(actualHyperSet);
        } finally {
            if (expectedRowSet != null) {
                expectedRowSet.clear();
            }
            queue.cleanup();
            hyperContainer.clear();
            for (RecordBatchData testBatch : testBatches) {
                testBatch.clear();
            }
        }
    }
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) RecordBatchData(org.apache.drill.exec.physical.impl.sort.RecordBatchData) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) FragmentContextImpl(org.apache.drill.exec.ops.FragmentContextImpl) Properties(java.util.Properties) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) RootAllocator(org.apache.drill.exec.memory.RootAllocator) DrillConfig(org.apache.drill.common.config.DrillConfig) Random(java.util.Random) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) Order(org.apache.drill.common.logical.data.Order) ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) FieldReference(org.apache.drill.common.expression.FieldReference) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer) BatchSchema(org.apache.drill.exec.record.BatchSchema) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 60 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TestHashAggBatch method createHashAggPhysicalOperator.

private HashAggregate createHashAggPhysicalOperator(AggPrelBase.OperatorPhase phase) {
    final List<NamedExpression> keyExpressions = Lists.newArrayList(new NamedExpression(SchemaPath.getSimplePath(FIRST_NAME_COL), new FieldReference(FIRST_NAME_COL)), new NamedExpression(SchemaPath.getSimplePath(LAST_NAME_COL), new FieldReference(LAST_NAME_COL)));
    final List<NamedExpression> aggExpressions = Lists.newArrayList(new NamedExpression(new FunctionCall("sum", ImmutableList.of(SchemaPath.getSimplePath(STUFF_COL)), new ExpressionPosition(null, 0)), new FieldReference(TOTAL_STUFF_COL)));
    return new HashAggregate(null, phase, keyExpressions, aggExpressions, 0.0f);
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) FunctionCall(org.apache.drill.common.expression.FunctionCall) ExpressionPosition(org.apache.drill.common.expression.ExpressionPosition) HashAggregate(org.apache.drill.exec.physical.config.HashAggregate)

Aggregations

FieldReference (org.apache.drill.common.expression.FieldReference)84 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)38 Test (org.junit.Test)22 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)19 FunctionCall (org.apache.drill.common.expression.FunctionCall)16 SchemaPath (org.apache.drill.common.expression.SchemaPath)14 Ordering (org.apache.drill.common.logical.data.Order.Ordering)14 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)13 ArrayList (java.util.ArrayList)12 ValueExpressions (org.apache.drill.common.expression.ValueExpressions)12 MaterializedField (org.apache.drill.exec.record.MaterializedField)12 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)10 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)9 Order (org.apache.drill.common.logical.data.Order)9 ExecTest (org.apache.drill.exec.ExecTest)9 AggregateCall (org.apache.calcite.rel.core.AggregateCall)8 OperatorTest (org.apache.drill.categories.OperatorTest)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)7 VectorTest (org.apache.drill.categories.VectorTest)6