Search in sources :

Example 31 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestOutputBatchSize method testHashJoinUpperLimit.

@Test
public void testHashJoinUpperLimit() throws Exception {
    // test the upper limit of 65535 records per batch.
    HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
    mockOpContext(hashJoin, initReservation, maxAllocation);
    numRows = 100000;
    // create left input rows like this.
    // "a1" : 5,  "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5,  "c1" : 1, "a2":6,  "c2": 1
    // "a1" : 5,  "c1" : 2, "a2":6,  "c2": 2
    // "a1" : 5,  "c1" : 3, "a2":6,  "c2": 3
    // expect two batches, batch limited by 65535 records
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "c1", "a2", "c2").expectedNumBatches(// verify number of batches
    2).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, i, 6l, i);
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 32 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestOutputBatchSize method testHashJoinLowerLimit.

@Test
public void testHashJoinLowerLimit() throws Exception {
    // test the lower limit of at least one batch
    HashJoinPOP hashJoin = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("c1", "EQUALS", "c2")), JoinRelType.INNER);
    mockOpContext(hashJoin, initReservation, maxAllocation);
    numRows = 10;
    // create left input rows like this.
    // "a1" : 5, "b1" : wideString, "c1" : <id>
    List<String> leftJsonBatches = Lists.newArrayList();
    StringBuilder leftBatchString = new StringBuilder();
    leftBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + i + "},");
    }
    leftBatchString.append("{\"a1\": 5, " + "\"b1\" : " + "\"" + wideString + "\"," + "\"c1\" : " + numRows + "}");
    leftBatchString.append("]");
    leftJsonBatches.add(leftBatchString.toString());
    // create right input rows like this.
    // "a2" : 6, "b2" : wideString, "c2" : <id>
    List<String> rightJsonBatches = Lists.newArrayList();
    StringBuilder rightBatchString = new StringBuilder();
    rightBatchString.append("[");
    for (int i = 0; i < numRows; i++) {
        rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + i + "},");
    }
    rightBatchString.append("{\"a2\": 6, " + "\"b2\" : " + "\"" + wideString + "\"," + "\"c2\" : " + numRows + "}");
    rightBatchString.append("]");
    rightJsonBatches.add(rightBatchString.toString());
    // output rows will be like this.
    // "a1" : 5, "b1" : wideString, "c1" : 1, "a2":6, "b2" : wideString, "c2": 1
    // "a1" : 5, "b1" : wideString, "c1" : 2, "a2":6, "b2" : wideString, "c2": 2
    // "a1" : 5, "b1" : wideString, "c1" : 3, "a2":6, "b2" : wideString, "c2": 3
    // set very low value of output batch size so we can do only one row per batch.
    fragContext.getOptions().setLocalOption("drill.exec.memory.operator.output_batch_size", 128);
    LegacyOperatorTestBuilder opTestBuilder = legacyOpTestBuilder().physicalOperator(hashJoin).baselineColumns("a1", "b1", "c1", "a2", "b2", "c2").expectedNumBatches(// verify number of batches
    10).inputDataStreamsJson(Lists.newArrayList(leftJsonBatches, rightJsonBatches));
    for (long i = 0; i < numRows + 1; i++) {
        opTestBuilder.baselineValues(5l, wideString, i, 6l, wideString, i);
    }
    opTestBuilder.go();
}
Also used : LegacyOperatorTestBuilder(org.apache.drill.test.LegacyOperatorTestBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 33 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestNullInputMiniPlan method testHashJoinLeftEmpty.

@Test
public void testHashJoinLeftEmpty() throws Exception {
    RecordBatch left = createScanBatchFromJson(SINGLE_EMPTY_JSON);
    List<String> rightJsonBatches = Lists.newArrayList("[{\"a\": 50, \"b\" : 10 }]");
    RecordBatch rightScan = new JsonScanBuilder().jsonBatches(rightJsonBatches).columnsToRead("a", "b").build();
    RecordBatch joinBatch = new PopBuilder().physicalOperator(new HashJoinPOP(null, null, Lists.newArrayList(joinCond("a2", "EQUALS", "a")), JoinRelType.INNER, null)).addInput(left).addInput(rightScan).build();
    SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable("a", TypeProtos.MinorType.BIGINT).addNullable("b", TypeProtos.MinorType.BIGINT);
    BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).withSVMode(BatchSchema.SelectionVectorMode.NONE).build();
    new MiniPlanTestBuilder().root(joinBatch).expectSchema(expectedSchema).expectZeroRow(true).go();
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) RecordBatch(org.apache.drill.exec.record.RecordBatch) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Example 34 with HashJoinPOP

use of org.apache.drill.exec.physical.config.HashJoinPOP in project drill by apache.

the class TestNullInputMiniPlan method testRightHashJoinEmptyBoth.

@Test
public void testRightHashJoinEmptyBoth() throws Exception {
    final PhysicalOperator join = new HashJoinPOP(null, null, Lists.newArrayList(joinCond("a", "EQUALS", "b")), JoinRelType.RIGHT, null);
    testTwoInputNullBatchHandling(join);
}
Also used : PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) HashJoinPOP(org.apache.drill.exec.physical.config.HashJoinPOP) Test(org.junit.Test)

Aggregations

HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)34 Test (org.junit.Test)30 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)11 RecordBatch (org.apache.drill.exec.record.RecordBatch)10 BatchSchema (org.apache.drill.exec.record.BatchSchema)8 OperatorTest (org.apache.drill.categories.OperatorTest)6 LegacyOperatorTestBuilder (org.apache.drill.test.LegacyOperatorTestBuilder)6 SlowTest (org.apache.drill.categories.SlowTest)5 JoinCondition (org.apache.drill.common.logical.data.JoinCondition)5 BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)4 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)4 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)4 ArrayList (java.util.ArrayList)3 JoinRelType (org.apache.calcite.rel.core.JoinRelType)3 RuntimeFilterDef (org.apache.drill.exec.work.filter.RuntimeFilterDef)3 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)2 BloomFilterDef (org.apache.drill.exec.work.filter.BloomFilterDef)2 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)1 VectorContainer (org.apache.drill.exec.record.VectorContainer)1