Search in sources :

Example 1 with BatchReference

use of org.apache.drill.exec.expr.BatchReference in project drill by axbaretto.

the class NestedLoopJoinBatch method setupWorker.

/**
 * Method generates the runtime code needed for NLJ. Other than the setup method to set the input and output value
 * vector references we implement three more methods
 * 1. doEval() -> Evaluates if record from left side matches record from the right side
 * 2. emitLeft() -> Project record from the left side
 * 3. emitRight() -> Project record from the right side (which is a hyper container)
 * @return the runtime generated class that implements the NestedLoopJoin interface
 */
private NestedLoopJoin setupWorker() throws IOException, ClassTransformationException, SchemaChangeException {
    final CodeGenerator<NestedLoopJoin> nLJCodeGenerator = CodeGenerator.get(NestedLoopJoin.TEMPLATE_DEFINITION, context.getOptions());
    nLJCodeGenerator.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    // nLJCodeGenerator.saveCodeForDebugging(true);
    final ClassGenerator<NestedLoopJoin> nLJClassGenerator = nLJCodeGenerator.getRoot();
    // generate doEval
    final ErrorCollector collector = new ErrorCollectorImpl();
    /*
        Logical expression may contain fields from left and right batches. During code generation (materialization)
        we need to indicate from which input field should be taken.

        Non-equality joins can belong to one of below categories. For example:
        1. Join on non-equality join predicates:
        select * from t1 inner join t2 on (t1.c1 between t2.c1 AND t2.c2) AND (...)
        2. Join with an OR predicate:
        select * from t1 inner join t2 on on t1.c1 = t2.c1 OR t1.c2 = t2.c2
     */
    Map<VectorAccessible, BatchReference> batches = ImmutableMap.<VectorAccessible, BatchReference>builder().put(left, new BatchReference("leftBatch", "leftIndex")).put(rightContainer, new BatchReference("rightContainer", "rightBatchIndex", "rightRecordIndexWithinBatch")).build();
    LogicalExpression materialize = ExpressionTreeMaterializer.materialize(popConfig.getCondition(), batches, collector, context.getFunctionRegistry(), false, false);
    if (collector.hasErrors()) {
        throw new SchemaChangeException(String.format("Failure while trying to materialize join condition. Errors:\n %s.", collector.toErrorString()));
    }
    nLJClassGenerator.addExpr(new ReturnValueExpression(materialize), ClassGenerator.BlkCreateMode.FALSE);
    // generate emitLeft
    nLJClassGenerator.setMappingSet(emitLeftMapping);
    JExpression outIndex = JExpr.direct("outIndex");
    JExpression leftIndex = JExpr.direct("leftIndex");
    int fieldId = 0;
    int outputFieldId = 0;
    if (leftSchema != null) {
        // Set the input and output value vector references corresponding to the left batch
        for (MaterializedField field : leftSchema) {
            final TypeProtos.MajorType fieldType = field.getType();
            // Add the vector to the output container
            container.addOrGet(field);
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch", new TypedFieldId(fieldType, false, fieldId));
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(fieldType, false, outputFieldId));
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    // generate emitRight
    fieldId = 0;
    nLJClassGenerator.setMappingSet(emitRightMapping);
    JExpression batchIndex = JExpr.direct("batchIndex");
    JExpression recordIndexWithinBatch = JExpr.direct("recordIndexWithinBatch");
    if (rightSchema != null) {
        // Set the input and output value vector references corresponding to the right batch
        for (MaterializedField field : rightSchema) {
            final TypeProtos.MajorType inputType = field.getType();
            TypeProtos.MajorType outputType;
            // if join type is LEFT, make sure right batch output fields data mode is optional
            if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
                outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
            } else {
                outputType = inputType;
            }
            MaterializedField newField = MaterializedField.create(field.getName(), outputType);
            container.addOrGet(newField);
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer", new TypedFieldId(inputType, true, fieldId));
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", new TypedFieldId(outputType, false, outputFieldId));
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(recordIndexWithinBatch).arg(outIndex).arg(inVV.component(batchIndex)));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    return context.getImplementationClass(nLJCodeGenerator);
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) MaterializedField(org.apache.drill.exec.record.MaterializedField) JExpression(com.sun.codemodel.JExpression) TypeProtos(org.apache.drill.common.types.TypeProtos) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) ReturnValueExpression(org.apache.drill.exec.physical.impl.filter.ReturnValueExpression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchReference(org.apache.drill.exec.expr.BatchReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) JVar(com.sun.codemodel.JVar)

Example 2 with BatchReference

use of org.apache.drill.exec.expr.BatchReference in project drill by apache.

the class NestedLoopJoinBatch method setupWorker.

/**
 * Method generates the runtime code needed for NLJ. Other than the setup method to set the input and output value
 * vector references we implement three more methods
 * 1. doEval() -> Evaluates if record from left side matches record from the right side
 * 2. emitLeft() -> Project record from the left side
 * 3. emitRight() -> Project record from the right side (which is a hyper container)
 * @return the runtime generated class that implements the NestedLoopJoin interface
 */
private NestedLoopJoin setupWorker() {
    final CodeGenerator<NestedLoopJoin> nLJCodeGenerator = CodeGenerator.get(SETUP_LEFT_MAPPING, NestedLoopJoin.TEMPLATE_DEFINITION, context.getOptions());
    nLJCodeGenerator.plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    // nLJCodeGenerator.saveCodeForDebugging(true);
    final ClassGenerator<NestedLoopJoin> nLJClassGenerator = nLJCodeGenerator.getRoot();
    // generate doEval
    final ErrorCollector collector = new ErrorCollectorImpl();
    /*
        Logical expression may contain fields from left and right batches. During code generation (materialization)
        we need to indicate from which input field should be taken.

        Non-equality joins can belong to one of below categories. For example:
        1. Join on non-equality join predicates:
        select * from t1 inner join t2 on (t1.c1 between t2.c1 AND t2.c2) AND (...)
        2. Join with an OR predicate:
        select * from t1 inner join t2 on on t1.c1 = t2.c1 OR t1.c2 = t2.c2
     */
    Map<VectorAccessible, BatchReference> batches = ImmutableMap.<VectorAccessible, BatchReference>builder().put(left, new BatchReference("leftBatch", "leftIndex")).put(rightContainer, new BatchReference("rightContainer", "rightBatchIndex", "rightRecordIndexWithinBatch")).build();
    LogicalExpression materialize = ExpressionTreeMaterializer.materialize(popConfig.getCondition(), batches, collector, context.getFunctionRegistry(), false, false);
    collector.reportErrors(logger);
    nLJClassGenerator.addExpr(new ReturnValueExpression(materialize), ClassGenerator.BlkCreateMode.FALSE);
    // generate emitLeft
    nLJClassGenerator.setMappingSet(emitLeftMapping);
    JExpression outIndex = JExpr.direct("outIndex");
    JExpression leftIndex = JExpr.direct("leftIndex");
    int fieldId = 0;
    int outputFieldId = 0;
    if (leftSchema != null) {
        // Set the input and output value vector references corresponding to the left batch
        for (MaterializedField field : leftSchema) {
            final TypeProtos.MajorType fieldType = field.getType();
            // Add the vector to the output container
            container.addOrGet(field);
            TypedFieldId inFieldId = new TypedFieldId.Builder().finalType(fieldType).hyper(false).addId(fieldId).build();
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("leftBatch", inFieldId);
            TypedFieldId outFieldId = new TypedFieldId.Builder().finalType(fieldType).hyper(false).addId(outputFieldId).build();
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", outFieldId);
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(leftIndex).arg(outIndex).arg(inVV));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    // generate emitRight
    fieldId = 0;
    nLJClassGenerator.setMappingSet(emitRightMapping);
    JExpression batchIndex = JExpr.direct("batchIndex");
    JExpression recordIndexWithinBatch = JExpr.direct("recordIndexWithinBatch");
    if (rightSchema != null) {
        // Set the input and output value vector references corresponding to the right batch
        for (MaterializedField field : rightSchema) {
            final TypeProtos.MajorType inputType = field.getType();
            TypeProtos.MajorType outputType;
            // if join type is LEFT, make sure right batch output fields data mode is optional
            if (popConfig.getJoinType() == JoinRelType.LEFT && inputType.getMode() == TypeProtos.DataMode.REQUIRED) {
                outputType = Types.overrideMode(inputType, TypeProtos.DataMode.OPTIONAL);
            } else {
                outputType = inputType;
            }
            MaterializedField newField = MaterializedField.create(field.getName(), outputType);
            container.addOrGet(newField);
            TypedFieldId inFieldId = new TypedFieldId.Builder().finalType(inputType).hyper(true).addId(fieldId).build();
            JVar inVV = nLJClassGenerator.declareVectorValueSetupAndMember("rightContainer", inFieldId);
            TypedFieldId outFieldId = new TypedFieldId.Builder().finalType(outputType).hyper(false).addId(outputFieldId).build();
            JVar outVV = nLJClassGenerator.declareVectorValueSetupAndMember("outgoing", outFieldId);
            nLJClassGenerator.getEvalBlock().add(outVV.invoke("copyFromSafe").arg(recordIndexWithinBatch).arg(outIndex).arg(inVV.component(batchIndex)));
            nLJClassGenerator.rotateBlock();
            fieldId++;
            outputFieldId++;
        }
    }
    return context.getImplementationClass(nLJCodeGenerator);
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) MaterializedField(org.apache.drill.exec.record.MaterializedField) JExpression(com.sun.codemodel.JExpression) TypeProtos(org.apache.drill.common.types.TypeProtos) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) ReturnValueExpression(org.apache.drill.exec.physical.impl.filter.ReturnValueExpression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) BatchReference(org.apache.drill.exec.expr.BatchReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) JVar(com.sun.codemodel.JVar)

Aggregations

JExpression (com.sun.codemodel.JExpression)2 JVar (com.sun.codemodel.JVar)2 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)2 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)2 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)2 TypeProtos (org.apache.drill.common.types.TypeProtos)2 BatchReference (org.apache.drill.exec.expr.BatchReference)2 ReturnValueExpression (org.apache.drill.exec.physical.impl.filter.ReturnValueExpression)2 MaterializedField (org.apache.drill.exec.record.MaterializedField)2 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)2 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)2 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)1