Search in sources :

Example 1 with TopN

use of org.apache.drill.exec.physical.config.TopN in project drill by axbaretto.

the class BasicPhysicalOpUnitTest method testTopN.

@Test
public void testTopN() {
    TopN sortConf = new TopN(null, Lists.newArrayList(ordering("b", RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.FIRST)), false, 3);
    List<String> inputJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 1 }]", "[{\"a\": 5, \"b\" : 5},{\"a\": 3, \"b\" : 8}]", "[{\"a\": 40, \"b\" : 3},{\"a\": 13, \"b\" : 100}]");
    opTestBuilder().physicalOperator(sortConf).inputDataStreamJson(inputJsonBatches).baselineColumns("a", "b").baselineValues(5l, 1l).baselineValues(40l, 3l).baselineValues(5l, 5l).go();
}
Also used : TopN(org.apache.drill.exec.physical.config.TopN) Test(org.junit.Test)

Example 2 with TopN

use of org.apache.drill.exec.physical.config.TopN in project drill by apache.

the class TestTopNEmitOutcome method testTopNMultipleOutputBatchWithLowerLimits.

/**
 * Verifies that if TopNBatch receives multiple non-empty record batch with EMIT outcome in between then it produces
 * output for those input batch correctly. In this case it receives first non-empty batch with OK_NEW_SCHEMA in
 * buildSchema phase followed by an empty batch with EMIT outcome. For this combination it produces output for the
 * record received so far along with EMIT outcome. Then it receives second non-empty batch with OK outcome and
 * produces output for it differently. The test validates that for each output received the order of the records are
 * correct
 */
@Test
public void testTopNMultipleOutputBatchWithLowerLimits() {
    final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(4, 40, "item4").addRow(2, 20, "item2").addRow(5, 50, "item5").addRow(3, 30, "item3").build();
    final RowSet.SingleRowSet expectedRowSet1 = operatorFixture.rowSetBuilder(inputSchema).addRow(1, 10, "item1").build();
    final RowSet.SingleRowSet expectedRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(5, 50, "item5").build();
    inputContainer.add(nonEmptyInputRowSet.container());
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(nonEmptyInputRowSet2.container());
    inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.OK);
    final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
    final TopN topNConfig = new TopN(null, Lists.newArrayList(ordering("id_left", RelFieldCollation.Direction.DESCENDING, RelFieldCollation.NullDirection.FIRST)), false, 1);
    final TopNBatch topNBatch = new TopNBatch(topNConfig, operatorFixture.getFragmentContext(), mockInputBatch);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    assertEquals(1, topNBatch.getRecordCount());
    // verify results with baseline
    RowSet actualRowSet1 = HyperRowSetImpl.fromContainer(topNBatch.getContainer(), topNBatch.getSelectionVector4());
    new RowSetComparison(expectedRowSet1).verify(actualRowSet1);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertEquals(0, topNBatch.getRecordCount());
    // State refresh happens and limit again works on new data batches
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK);
    assertEquals(1, topNBatch.getRecordCount());
    // verify results with baseline
    RowSet actualRowSet2 = HyperRowSetImpl.fromContainer(topNBatch.getContainer(), topNBatch.getSelectionVector4());
    new RowSetComparison(expectedRowSet2).verify(actualRowSet2);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.NONE);
    // Release memory for row sets
    nonEmptyInputRowSet2.clear();
    expectedRowSet2.clear();
    expectedRowSet1.clear();
}
Also used : RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) TopN(org.apache.drill.exec.physical.config.TopN) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 3 with TopN

use of org.apache.drill.exec.physical.config.TopN in project drill by apache.

the class TestTopNEmitOutcome method testTopNNonEmptyBatchEmitOutcome.

/**
 * Verifies that if TopNBatch receives a RecordBatch with EMIT outcome post build schema phase then it produces
 * output for those input batch correctly. The first output batch will always be returned with OK_NEW_SCHEMA
 * outcome followed by EMIT with empty batch. The test verifies the output order with the expected baseline.
 */
@Test
public void testTopNNonEmptyBatchEmitOutcome() {
    final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").addRow(13, 130, "item13").addRow(4, 40, "item4").build();
    final RowSet.SingleRowSet expectedRowSet = operatorFixture.rowSetBuilder(inputSchema).addRow(13, 130, "item13").addRow(4, 40, "item4").addRow(2, 20, "item2").build();
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(nonEmptyInputRowSet2.container());
    inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
    final TopN topNConfig = new TopN(null, Lists.newArrayList(ordering("id_left", RelFieldCollation.Direction.DESCENDING, RelFieldCollation.NullDirection.FIRST)), false, 10);
    final TopNBatch topNBatch = new TopNBatch(topNConfig, operatorFixture.getFragmentContext(), mockInputBatch);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    outputRecordCount += topNBatch.getRecordCount();
    assertEquals(0, outputRecordCount);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    outputRecordCount += topNBatch.getRecordCount();
    assertEquals(3, outputRecordCount);
    // verify results
    RowSet actualRowSet = HyperRowSetImpl.fromContainer(topNBatch.getContainer(), topNBatch.getSelectionVector4());
    new RowSetComparison(expectedRowSet).verify(actualRowSet);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    outputRecordCount += topNBatch.getRecordCount();
    assertEquals(3, outputRecordCount);
    // Release memory for row sets
    nonEmptyInputRowSet2.clear();
    expectedRowSet.clear();
}
Also used : RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) TopN(org.apache.drill.exec.physical.config.TopN) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 4 with TopN

use of org.apache.drill.exec.physical.config.TopN in project drill by apache.

the class TestTopNEmitOutcome method testTopNMultipleEmptyBatchFollowedByNonEmptyBatchEmitOutcome.

@Test
public void testTopNMultipleEmptyBatchFollowedByNonEmptyBatchEmitOutcome() {
    final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").addRow(13, 130, "item13").addRow(4, 40, "item4").build();
    final RowSet.SingleRowSet expectedRowSet = operatorFixture.rowSetBuilder(inputSchema).addRow(13, 130, "item13").addRow(4, 40, "item4").addRow(2, 20, "item2").build();
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(nonEmptyInputRowSet2.container());
    inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
    final TopN topNConfig = new TopN(null, Lists.newArrayList(ordering("id_left", RelFieldCollation.Direction.DESCENDING, RelFieldCollation.NullDirection.FIRST)), false, 10);
    final TopNBatch topNBatch = new TopNBatch(topNConfig, operatorFixture.getFragmentContext(), mockInputBatch);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    outputRecordCount += topNBatch.getRecordCount();
    assertEquals(0, outputRecordCount);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    outputRecordCount += topNBatch.getRecordCount();
    assertEquals(3, outputRecordCount);
    // verify results
    RowSet actualRowSet = HyperRowSetImpl.fromContainer(topNBatch.getContainer(), topNBatch.getSelectionVector4());
    new RowSetComparison(expectedRowSet).verify(actualRowSet);
    // Release memory for row sets
    nonEmptyInputRowSet2.clear();
    expectedRowSet.clear();
}
Also used : RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) TopN(org.apache.drill.exec.physical.config.TopN) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Example 5 with TopN

use of org.apache.drill.exec.physical.config.TopN in project drill by apache.

the class TestTopNEmitOutcome method testTopNMultipleEMITOutcome.

@Test
public void testTopNMultipleEMITOutcome() {
    final RowSet.SingleRowSet nonEmptyInputRowSet2 = operatorFixture.rowSetBuilder(inputSchema).addRow(2, 20, "item2").addRow(3, 30, "item3").build();
    inputContainer.add(nonEmptyInputRowSet.container());
    inputContainer.add(emptyInputRowSet.container());
    inputContainer.add(nonEmptyInputRowSet2.container());
    inputContainer.add(emptyInputRowSet.container());
    inputOutcomes.add(RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    inputOutcomes.add(RecordBatch.IterOutcome.EMIT);
    final MockRecordBatch mockInputBatch = new MockRecordBatch(operatorFixture.getFragmentContext(), opContext, inputContainer, inputOutcomes, emptyInputRowSet.container().getSchema());
    final TopN topNConfig = new TopN(null, Lists.newArrayList(ordering("id_left", RelFieldCollation.Direction.DESCENDING, RelFieldCollation.NullDirection.FIRST)), false, 10);
    final TopNBatch topNBatch = new TopNBatch(topNConfig, operatorFixture.getFragmentContext(), mockInputBatch);
    // first limit evaluation
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.OK_NEW_SCHEMA);
    assertEquals(1, topNBatch.getRecordCount());
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertEquals(0, topNBatch.getRecordCount());
    // After seeing EMIT limit will refresh it's state and again evaluate limit on next set of input batches
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertEquals(2, topNBatch.getRecordCount());
    assertTrue(topNBatch.next() == RecordBatch.IterOutcome.EMIT);
    assertEquals(0, topNBatch.getRecordCount());
    nonEmptyInputRowSet2.clear();
}
Also used : RowSet(org.apache.drill.exec.physical.rowSet.RowSet) MockRecordBatch(org.apache.drill.exec.physical.impl.MockRecordBatch) TopN(org.apache.drill.exec.physical.config.TopN) OperatorTest(org.apache.drill.categories.OperatorTest) Test(org.junit.Test)

Aggregations

TopN (org.apache.drill.exec.physical.config.TopN)17 Test (org.junit.Test)15 OperatorTest (org.apache.drill.categories.OperatorTest)13 MockRecordBatch (org.apache.drill.exec.physical.impl.MockRecordBatch)13 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)10 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)9 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)2