Search in sources :

Example 31 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TestOutputBatchSize method testLeftNestedLoopJoin.

@Test
public void testLeftNestedLoopJoin() throws Exception {
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.LEFT, functionCallExpr);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 32 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TestOutputBatchSize method testNestedLoopJoinMultipleOutputBatches.

@Test
public void testNestedLoopJoinMultipleOutputBatches() throws Exception {
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
    mockOpContext(nestedLoopJoin, initReservation, maxAllocation);
    numRows = 4000 * 2;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    List<String> expectedJsonBatches = Lists.newArrayList();
    StringBuilder expectedBatchString = new StringBuilder();
    expectedBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i);
        expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    expectedBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows);
    expectedBatchString.append(", \"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    expectedBatchString.append("]");
    expectedJsonBatches.add(expectedBatchString.toString());
    long totalSize = getExpectedSize(expectedJsonBatches);
    // set the output batch size to 1/2 of total size expected.
    // We will get approximately 4 batches.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", totalSize / 2);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    4).expectedBatchSize(// verify batch size
    totalSize / 2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 33 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class TestOutputBatchSize method testNestedLoopJoinLowerLimit.

@Test
public void testNestedLoopJoinLowerLimit() throws Exception {
    // test the lower limit of at least one batch
    LogicalExpression functionCallExpr = new FunctionCall("equal", ImmutableList.of((LogicalExpression) new FieldReference("c1", ExpressionPosition.UNKNOWN), (LogicalExpression) new FieldReference("c2", ExpressionPosition.UNKNOWN)), ExpressionPosition.UNKNOWN);
    NestedLoopJoinPOP nestedLoopJoin = new NestedLoopJoinPOP(null, null, JoinRelType.INNER, functionCallExpr);
    numRows = 10;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    // set very low value of output batch size so we can do only one row per batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(nestedLoopJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) FunctionCall(org.apache.drill.common.expression.FunctionCall) NestedLoopJoinPOP(org.apache.drill.exec.physical.config.NestedLoopJoinPOP) Test(org.junit.Test)

Example 34 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class DrillLogicalTestUtils method parseExprs.

public static List<NamedExpression> parseExprs(String... expressionsAndOutputNames) {
    Preconditions.checkArgument(expressionsAndOutputNames.length % 2 == 0, "List of expressions and output field names" + " is not complete, each expression must explicitly give and output name,");
    List<NamedExpression> ret = new ArrayList<>();
    for (int i = 0; i < expressionsAndOutputNames.length; i += 2) {
        ret.add(new NamedExpression(LogicalExpressionParser.parse(expressionsAndOutputNames[i]), new FieldReference(new SchemaPath(new PathSegment.NameSegment(expressionsAndOutputNames[i + 1])))));
    }
    return ret;
}
Also used : FieldReference(org.apache.drill.common.expression.FieldReference) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList)

Example 35 with FieldReference

use of org.apache.drill.common.expression.FieldReference in project drill by apache.

the class HashAggTemplate method setup.

@Override
public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorContext oContext, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, ClassGenerator<?> cg, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer, int extraRowBytes) {
    if (valueExprs == null || valueFieldIds == null) {
        throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables.");
    }
    if (valueFieldIds.size() < valueExprs.length) {
        throw new IllegalArgumentException("Wrong number of workspace variables.");
    }
    this.context = context;
    this.stats = oContext.getStats();
    this.allocator = oContext.getAllocator();
    this.updater = new HashAggUpdater(allocator);
    this.oContext = oContext;
    this.incoming = incoming;
    this.outgoing = outgoing;
    this.cg = cg;
    this.outContainer = outContainer;
    this.useMemoryPrediction = context.getOptions().getOption(ExecConstants.HASHAGG_USE_MEMORY_PREDICTION_VALIDATOR);
    this.phase = hashAggrConfig.getAggPhase();
    // single phase can not spill
    canSpill = phase.hasTwo();
    // Typically for testing - force a spill after a partition has more than so many batches
    minBatchesPerPartition = context.getOptions().getOption(ExecConstants.HASHAGG_MIN_BATCHES_PER_PARTITION_VALIDATOR);
    // Set the memory limit
    long memoryLimit = allocator.getLimit();
    // Optional configured memory limit, typically used only for testing.
    long configLimit = context.getOptions().getOption(ExecConstants.HASHAGG_MAX_MEMORY_VALIDATOR);
    if (configLimit > 0) {
        logger.warn("Memory limit was changed to {}", configLimit);
        memoryLimit = Math.min(memoryLimit, configLimit);
        // enforce at the allocator
        allocator.setLimit(memoryLimit);
    }
    // TODO:  This functionality will be added later.
    if (hashAggrConfig.getGroupByExprs().size() == 0) {
        throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions.");
    }
    htIdxHolder = new IndexPointer();
    materializedValueFields = new MaterializedField[valueFieldIds.size()];
    if (valueFieldIds.size() > 0) {
        int i = 0;
        FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType());
        for (TypedFieldId id : valueFieldIds) {
            materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType());
        }
    }
    spillSet = new SpillSet(context, hashAggrConfig);
    baseHashTable = new ChainedHashTable(htConfig, context, allocator, incoming, null, /* no incoming probe */
    outgoing);
    // retain these for delayedSetup, and to allow recreating hash tables (after a spill)
    this.groupByOutFieldIds = groupByOutFieldIds;
    numGroupByOutFields = groupByOutFieldIds.length;
    // Start calculating the row widths (with the extra columns; the rest would be done in updateEstMaxBatchSize())
    estRowWidth = extraRowBytes;
    estValuesRowWidth = extraRowBytes;
    try {
        doSetup(incoming);
    } catch (SchemaChangeException e) {
        throw HashAggBatch.schemaChangeException(e, "Hash Aggregate", logger);
    }
}
Also used : SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) FieldReference(org.apache.drill.common.expression.FieldReference) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) IndexPointer(org.apache.drill.exec.physical.impl.common.IndexPointer) SpillSet(org.apache.drill.exec.physical.impl.spill.SpillSet) ChainedHashTable(org.apache.drill.exec.physical.impl.common.ChainedHashTable)

Aggregations

FieldReference (org.apache.drill.common.expression.FieldReference)84 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)38 Test (org.junit.Test)22 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)19 FunctionCall (org.apache.drill.common.expression.FunctionCall)16 SchemaPath (org.apache.drill.common.expression.SchemaPath)14 Ordering (org.apache.drill.common.logical.data.Order.Ordering)14 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)13 ArrayList (java.util.ArrayList)12 ValueExpressions (org.apache.drill.common.expression.ValueExpressions)12 MaterializedField (org.apache.drill.exec.record.MaterializedField)12 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)10 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)9 Order (org.apache.drill.common.logical.data.Order)9 ExecTest (org.apache.drill.exec.ExecTest)9 AggregateCall (org.apache.calcite.rel.core.AggregateCall)8 OperatorTest (org.apache.drill.categories.OperatorTest)8 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)8 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)7 VectorTest (org.apache.drill.categories.VectorTest)6