use of org.apache.drill.exec.physical.config.LateralJoinPOP in project drill by apache.
the class TestLateralJoinCorrectnessBatchProcessing method testLeftAndRightWithInitialMissingRows_LeftJoin_MultipleBatch.
@Test
public void testLeftAndRightWithInitialMissingRows_LeftJoin_MultipleBatch() throws Exception {
leftContainer.add(nonEmptyLeftRowSet.container());
// Get the left IterOutcomes for Lateral Join
leftOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
// Create Left MockRecordBatch
final CloseableRecordBatch leftMockBatch = new MockRecordBatch(fixture.getFragmentContext(), operatorContext, leftContainer, leftOutcomes, leftContainer.get(0).getSchema());
// Get the right container with dummy data
final RowSet.SingleRowSet nonEmptyRightRowSet2 = fixture.rowSetBuilder(rightSchema).addRow(2, 22, 220, "item22").addRow(3, 33, 330, "item33").build();
final RowSet.SingleRowSet nonEmptyRightRowSet3 = fixture.rowSetBuilder(rightSchema).addRow(4, 44, 440, "item44_1").addRow(4, 44, 440, "item44_2").build();
final RowSet.SingleRowSet expectedRowSet = fixture.rowSetBuilder(expectedSchemaLeftJoin).addRow(1, 10, "item1", null, null, null).addRow(2, 20, "item2", 22, 220, "item22").addRow(3, 30, "item3", 33, 330, "item33").addRow(4, 40, "item4", 44, 440, "item44_1").addRow(4, 40, "item4", 44, 440, "item44_2").build();
rightContainer.add(emptyRightRowSet.container());
rightContainer.add(nonEmptyRightRowSet2.container());
rightContainer.add(nonEmptyRightRowSet3.container());
rightOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
rightOutcomes.add(RecordBatch.IterOutcome.OK);
rightOutcomes.add(RecordBatch.IterOutcome.EMIT);
final CloseableRecordBatch rightMockBatch = new MockRecordBatch(fixture.getFragmentContext(), operatorContext, rightContainer, rightOutcomes, rightContainer.get(0).getSchema());
LateralJoinPOP ljPopConfig = new LateralJoinPOP(null, null, JoinRelType.LEFT, DrillLateralJoinRelBase.IMPLICIT_COLUMN, Lists.newArrayList());
final LateralJoinBatch ljBatch = new LateralJoinBatch(ljPopConfig, fixture.getFragmentContext(), leftMockBatch, rightMockBatch);
try {
assertTrue(RecordBatch.IterOutcome.OK_NEW_SCHEMA == ljBatch.next());
assertTrue(RecordBatch.IterOutcome.OK == ljBatch.next());
assertTrue(ljBatch.getRecordCount() == (1 + nonEmptyRightRowSet2.rowCount() + nonEmptyRightRowSet3.rowCount()));
// verify results
RowSet actualRowSet = DirectRowSet.fromContainer(ljBatch.getContainer());
new RowSetComparison(expectedRowSet).verify(actualRowSet);
assertTrue(RecordBatch.IterOutcome.NONE == ljBatch.next());
} finally {
// Close all the resources for this test case
ljBatch.close();
leftMockBatch.close();
rightMockBatch.close();
nonEmptyRightRowSet2.clear();
expectedRowSet.clear();
}
}
use of org.apache.drill.exec.physical.config.LateralJoinPOP in project drill by apache.
the class TestLateralJoinCorrectness method testLeftLateralJoin_WithAndWithoutMatching_MultipleBatch.
/**
* Test to see if there are multiple rows in left batch and for some rows right side produces batch with records
* and for other rows right side produces empty batches then based on left join type we are populating the output
* batch correctly. Expectation is that for left rows if we find corresponding right rows then we will output both
* using cross-join but for left rows for which there is empty right side we will produce only left row in output
* batch. But in this test we have made the Batch size very small so that output will be returned in multiple
* output batches. This test verifies even in this case indexes are manipulated correctly and outputs are produced
* correctly.
* TODO: Update the test case based on Batch Sizing project since then the variable might not be available.
*
* @throws Exception
*/
@Test
public void testLeftLateralJoin_WithAndWithoutMatching_MultipleBatch() throws Exception {
// Get the left container with dummy data for Lateral Join
final RowSet.SingleRowSet leftRowSet2 = fixture.rowSetBuilder(leftSchema).addRow(1, 10, "item10").addRow(2, 20, "item20").addRow(3, 30, "item30").build();
final RowSet.SingleRowSet nonEmptyRightRowSet2 = fixture.rowSetBuilder(rightSchema).addRow(3, 6, 60, "item61").addRow(3, 7, 70, "item71").addRow(3, 8, 80, "item81").build();
leftContainer.add(leftRowSet2.container());
// Get the left IterOutcomes for Lateral Join
leftOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
// Create Left MockRecordBatch
final CloseableRecordBatch leftMockBatch = new MockRecordBatch(fixture.getFragmentContext(), operatorContext, leftContainer, leftOutcomes, leftContainer.get(0).getSchema());
// Get the right container with dummy data
rightContainer.add(emptyRightRowSet.container());
rightContainer.add(nonEmptyRightRowSet.container());
rightContainer.add(emptyRightRowSet.container());
rightContainer.add(nonEmptyRightRowSet2.container());
rightOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
rightOutcomes.add(RecordBatch.IterOutcome.OK);
rightOutcomes.add(RecordBatch.IterOutcome.OK);
rightOutcomes.add(RecordBatch.IterOutcome.EMIT);
final CloseableRecordBatch rightMockBatch = new MockRecordBatch(fixture.getFragmentContext(), operatorContext, rightContainer, rightOutcomes, rightContainer.get(0).getSchema());
final LateralJoinPOP popConfig = new LateralJoinPOP(null, null, JoinRelType.LEFT, DrillLateralJoinRelBase.IMPLICIT_COLUMN, Lists.newArrayList());
final LateralJoinBatch ljBatch = new LateralJoinBatch(popConfig, fixture.getFragmentContext(), leftMockBatch, rightMockBatch);
int originalMaxBatchSize = 2;
ljBatch.setUseMemoryManager(false);
ljBatch.setMaxOutputRowCount(originalMaxBatchSize);
try {
// 3 for first left row and 1 for second left row
final int expectedOutputRecordCount = 7;
int actualOutputRecordCount = 0;
assertTrue(RecordBatch.IterOutcome.OK_NEW_SCHEMA == ljBatch.next());
assertTrue(RecordBatch.IterOutcome.OK == ljBatch.next());
actualOutputRecordCount += ljBatch.getRecordCount();
assertTrue(RecordBatch.IterOutcome.OK == ljBatch.next());
actualOutputRecordCount += ljBatch.getRecordCount();
assertTrue(RecordBatch.IterOutcome.OK == ljBatch.next());
actualOutputRecordCount += ljBatch.getRecordCount();
assertTrue(RecordBatch.IterOutcome.OK == ljBatch.next());
actualOutputRecordCount += ljBatch.getRecordCount();
assertTrue(actualOutputRecordCount == expectedOutputRecordCount);
assertTrue(RecordBatch.IterOutcome.NONE == ljBatch.next());
} catch (AssertionError | Exception error) {
// fail();
throw error;
} finally {
// Close all the resources for this test case
ljBatch.close();
leftMockBatch.close();
rightMockBatch.close();
// leftRowSet2.clear();
// nonEmptyRightRowSet2.clear();
}
}
use of org.apache.drill.exec.physical.config.LateralJoinPOP in project drill by apache.
the class TestUnnestWithLateralCorrectness method setUpBeforeClass.
@BeforeClass
public static void setUpBeforeClass() throws Exception {
mockPopConfig = new MockStorePOP(null);
ljPopConfig = new LateralJoinPOP(null, null, JoinRelType.INNER, DrillLateralJoinRelBase.IMPLICIT_COLUMN, Lists.newArrayList());
operatorContext = fixture.newOperatorContext(mockPopConfig);
}
use of org.apache.drill.exec.physical.config.LateralJoinPOP in project drill by apache.
the class TestUnnestWithLateralCorrectness method testUnnestLimitBatchSize_WithExcludedCols.
@Test
public void testUnnestLimitBatchSize_WithExcludedCols() {
LateralJoinPOP previoudPop = ljPopConfig;
List<SchemaPath> excludedCols = new ArrayList<>();
excludedCols.add(SchemaPath.getSimplePath("unnestColumn"));
ljPopConfig = new LateralJoinPOP(null, null, JoinRelType.INNER, DrillLateralJoinRelBase.IMPLICIT_COLUMN, excludedCols);
final int limitedOutputBatchSize = 127;
final int inputBatchSize = limitedOutputBatchSize + 1;
// Since we want 127 row count and because of nearest power of 2 adjustment output row count will be reduced to
// 64. So we should configure batch size for (N+1) rows if we want to output N rows where N is not power of 2
// size of lateral output batch = (N+1)*8 bytes, where N = output batch row count
// Lateral output batch size = (N+1) * (input row size without unnest field) + (N+1) * size of single unnest column
// = (N+1) * (size of row id) + (N+1) * (size of single array entry)
// = (N+1)*4 + (N+1) * 4
// = (N+1) * 8
// configure the output batch size to be one more record than that so that the batch sizer can round down
final int limitedOutputBatchSizeBytes = 8 * (limitedOutputBatchSize + 1);
testUnnestBatchSizing(inputBatchSize, limitedOutputBatchSize, limitedOutputBatchSizeBytes, true);
ljPopConfig = previoudPop;
}
use of org.apache.drill.exec.physical.config.LateralJoinPOP in project drill by apache.
the class Materializer method visitLateralJoin.
@Override
public PhysicalOperator visitLateralJoin(LateralJoinPOP op, IndexedFragmentNode iNode) throws ExecutionSetupException {
iNode.addAllocation(op);
List<PhysicalOperator> children = Lists.newArrayList();
children.add(op.getLeft().accept(this, iNode));
children.add(op.getRight().accept(this, iNode));
UnnestPOP unnestForThisLateral = iNode.getUnnest();
PhysicalOperator newOp = op.getNewWithChildren(children);
newOp.setCost(op.getCost());
newOp.setOperatorId(Short.MAX_VALUE & op.getOperatorId());
((LateralJoinPOP) newOp).setUnnestForLateralJoin(unnestForThisLateral);
return newOp;
}
Aggregations