use of org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch in project drill by apache.
the class TestStreamingAggEmitOutcome method t18_testStreamingAggrMultipleInputToSingleOutputBatch.
/**
* Repeats t8_testStreamingAggrMultipleInputToSingleOutputBatch with no group by
*/
@Test
public void t18_testStreamingAggrMultipleInputToSingleOutputBatch() {
final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").build();
final RowSet.SingleRowSet expectedRowSet = operatorFixture.rowSetBuilder(resultSchemaNoGroupBy).addRow((long) 33).build();
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet.container());
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet2.container());
inputContainer.add(emptyInputRowSet.container());
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
final StreamingAggregate streamAggrConfig = new StreamingAggregate(null, new ArrayList<>(), parseExprs("sum(id_left+cost_left)", "total_sum"));
final StreamingAggBatch strAggBatch = new StreamingAggBatch(streamAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(1, strAggBatch.getRecordCount());
RowSet actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(0, strAggBatch.getRecordCount());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.NONE);
nonEmptyInputRowSet2.clear();
}
use of org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch in project drill by apache.
the class TestStreamingAggEmitOutcome method t8_1_testStreamingAggr_InputSplitToMultipleOutputBatch.
/**
* Verifies scenario where multiple incoming batches received with OK_NEW_SCHEMA, OK, OK, EMIT whose output is split
* into multiple output batches is handled correctly such that first output is produced with OK_NEW_SCHEMA and then
* followed by EMIT outcome
*/
@Test
public void t8_1_testStreamingAggr_InputSplitToMultipleOutputBatch() {
final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(1, 20, "item1").build();
final RowSet.SingleRowSet nonEmptyInputRowSet3 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 30, "item2").build();
final RowSet.SingleRowSet nonEmptyInputRowSet4 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 40, "item2").addRow(2, 50, "item2").addRow(2, 60, "item2").addRow(2, 70, "item2").addRow(3, 100, "item3").addRow(3, 200, "item3").addRow(3, 300, "item3").addRow(3, 400, "item3").build();
TupleMetadata resultSchema2 = new SchemaBuilder().add("name", TypeProtos.MinorType.VARCHAR).add("id", TypeProtos.MinorType.INT).add("total_count", TypeProtos.MinorType.BIGINT).buildSchema();
final RowSet.SingleRowSet expectedRowSet1 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item1", 1, (long) 2).addRow("item2", 2, (long) 5).build();
final RowSet.SingleRowSet expectedRowSet2 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item3", 3, (long) 4).build();
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet2.container());
inputContainer.add(nonEmptyInputRowSet3.container());
inputContainer.add(nonEmptyInputRowSet4.container());
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
final StreamingAggregate streamAggrConfig = new StreamingAggregate(null, parseExprs("name_left", "name", "id_left", "id"), parseExprs("count(cost_left)", "total_count"));
final StreamingAggBatch strAggBatch = new StreamingAggBatch(streamAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
strAggBatch.setMaxOutputRowCount(2);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
// Expect OK_NEW_SCHEMA first for all the input batch from second batch onwards since output batch is full after
// producing 2 groups as output
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(2, strAggBatch.getRecordCount());
RowSet actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet1).verify(actualRowSet);
// The last group was produced in different output batch with EMIT outcome
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet2).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.NONE);
nonEmptyInputRowSet2.clear();
nonEmptyInputRowSet3.clear();
nonEmptyInputRowSet4.clear();
expectedRowSet1.clear();
expectedRowSet2.clear();
}
use of org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch in project drill by apache.
the class TestStreamingAggEmitOutcome method t8_2_testStreamingAggr_Inputs_OK_EMIT_SplitToMultipleOutputBatch.
/**
* Verifies scenario where multiple incoming batches received with OK_NEW_SCHEMA, OK, OK, EMIT whose output is split
* into multiple output batches and incoming batches received with OK,OK,EMIT whose output is also split across
* multiple output batches is handled correctly.
*/
@Test
public void t8_2_testStreamingAggr_Inputs_OK_EMIT_SplitToMultipleOutputBatch() {
final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(1, 20, "item1").build();
final RowSet.SingleRowSet nonEmptyInputRowSet3 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 30, "item2").build();
final RowSet.SingleRowSet nonEmptyInputRowSet4 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 40, "item2").addRow(2, 50, "item2").addRow(2, 60, "item2").addRow(2, 70, "item2").addRow(3, 100, "item3").addRow(3, 200, "item3").addRow(3, 300, "item3").addRow(3, 400, "item3").build();
final RowSet.SingleRowSet nonEmptyInputRowSet5 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 40, "item2").build();
final RowSet.SingleRowSet nonEmptyInputRowSet6 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 50, "item2").build();
final RowSet.SingleRowSet nonEmptyInputRowSet7 = operatorFixture.rowSetBuilder(inputSchema).addRow(3, 130, "item3").addRow(3, 130, "item3").addRow(4, 140, "item4").addRow(4, 140, "item4").build();
TupleMetadata resultSchema2 = new SchemaBuilder().add("name", TypeProtos.MinorType.VARCHAR).add("id", TypeProtos.MinorType.INT).add("total_count", TypeProtos.MinorType.BIGINT).buildSchema();
final RowSet.SingleRowSet expectedRowSet1 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item1", 1, (long) 2).addRow("item2", 2, (long) 5).build();
final RowSet.SingleRowSet expectedRowSet2 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item3", 3, (long) 4).build();
final RowSet.SingleRowSet expectedRowSet3 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item2", 2, (long) 2).addRow("item3", 3, (long) 2).build();
final RowSet.SingleRowSet expectedRowSet4 = operatorFixture.rowSetBuilder(resultSchema2).addRow("item4", 4, (long) 2).build();
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet2.container());
inputContainer.add(nonEmptyInputRowSet3.container());
inputContainer.add(nonEmptyInputRowSet4.container());
inputContainer.add(nonEmptyInputRowSet5.container());
inputContainer.add(nonEmptyInputRowSet6.container());
inputContainer.add(nonEmptyInputRowSet7.container());
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
final StreamingAggregate streamAggrConfig = new StreamingAggregate(null, parseExprs("name_left", "name", "id_left", "id"), parseExprs("count(cost_left)", "total_count"));
final StreamingAggBatch strAggBatch = new StreamingAggBatch(streamAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
strAggBatch.setMaxOutputRowCount(2);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
// Output batches for input batch 2 to 5
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(2, strAggBatch.getRecordCount());
RowSet actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet1).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet2).verify(actualRowSet);
// Output batches for input batch 6 to 8
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK);
// output batch is full after producing 2 rows
assertEquals(2, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet3).verify(actualRowSet);
// output batch with pending rows
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet4).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.NONE);
nonEmptyInputRowSet2.clear();
nonEmptyInputRowSet3.clear();
nonEmptyInputRowSet4.clear();
nonEmptyInputRowSet5.clear();
nonEmptyInputRowSet6.clear();
nonEmptyInputRowSet7.clear();
expectedRowSet1.clear();
expectedRowSet2.clear();
expectedRowSet3.clear();
expectedRowSet4.clear();
}
use of org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch in project drill by apache.
the class TestStreamingAggEmitOutcome method t22_testStreamingAggrRunsOfEmpty_NonEmpty.
/**
* Repeats t22_testStreamingAggrRunsOfEmpty_NonEmpty with no group by
*/
@Test
public void t22_testStreamingAggrRunsOfEmpty_NonEmpty() {
final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").build();
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet2.container());
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
final RowSet.SingleRowSet expectedRowSet = operatorFixture.rowSetBuilder(resultSchemaNoGroupBy).build();
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, inputContainer.get(0).getSchema());
final StreamingAggregate streamAggrConfig = new StreamingAggregate(null, new ArrayList<>(), parseExprs("sum(id_left+cost_left)", "total_sum"));
final StreamingAggBatch strAggBatch = new StreamingAggBatch(streamAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(0, strAggBatch.getRecordCount());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(1, strAggBatch.getRecordCount());
RowSet actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(0, strAggBatch.getRecordCount());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
nonEmptyInputRowSet2.clear();
expectedRowSet.clear();
}
use of org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch in project drill by apache.
the class TestStreamingAggEmitOutcome method t15_testStreamingAgrResetsAfterFirstEmitOutcome.
/**
* Repeats t5_testStreamingAgrResetsAfterFirstEmitOutcome with no group by
*/
@Test
public void t15_testStreamingAgrResetsAfterFirstEmitOutcome() {
final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").addRow(2, 20, "item2").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").addRow(3, 30, "item3").build();
final RowSet.SingleRowSet expectedRowSet1 = operatorFixture.rowSetBuilder(resultSchemaNoGroupBy).addRow((long) 11).build();
final RowSet.SingleRowSet expectedRowSet2 = operatorFixture.rowSetBuilder(resultSchemaNoGroupBy).addRow((long) 374).build();
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet.container());
inputContainer.add(emptyInputRowSet.container());
inputContainer.add(nonEmptyInputRowSet2.container());
inputContainer.add(emptyInputRowSet.container());
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
inputOutcomes.add(RecordBatch.IterOutcome.OK);
inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
final StreamingAggregate streamAggrConfig = new StreamingAggregate(null, new ArrayList<NamedExpression>(), parseExprs("sum(id_left+cost_left)", "total_sum"));
final StreamingAggBatch strAggBatch = new StreamingAggBatch(streamAggrConfig, mockInputBatch, operatorFixture.getFragmentContext());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
assertEquals(1, strAggBatch.getRecordCount());
RowSet actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet1).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(0, strAggBatch.getRecordCount());
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.EMIT);
assertEquals(1, strAggBatch.getRecordCount());
actualRowSet = DirectRowSet.fromContainer(strAggBatch.getContainer());
new RowSetComparison(expectedRowSet2).verify(actualRowSet);
assertTrue(strAggBatch.next() == RecordBatch.IterOutcome.NONE);
// Release memory for row sets
nonEmptyInputRowSet2.clear();
expectedRowSet2.clear();
expectedRowSet1.clear();
}
Aggregations