use of org.apache.drill.exec.physical.impl.scan.ScanOperatorExec in project drill by apache.
the class TestScanLateSchema method testLateSchemaLifecycleNoFile.
/**
* Test a late-schema source that has no file information.
* (Like a Hive or JDBC data source.)
*/
@Test
public void testLateSchemaLifecycleNoFile() {
// Create a mock reader, return two batches: one schema-only, another with data.
ReaderCreator creator = negotiator -> {
MockLateSchemaReader reader = new MockLateSchemaReader(negotiator);
reader.batchLimit = 2;
reader.returnDataOnFirst = false;
return reader;
};
ScanFixture scanFixture = simpleFixture(creator);
ScanOperatorExec scan = scanFixture.scanOp;
// First batch: build schema. The reader helps: it returns an
// empty first batch.
assertTrue(scan.buildSchema());
assertEquals(0, scan.batchAccessor().rowCount());
// Create the expected result.
SingleRowSet expected = makeExpected(20);
RowSetComparison verifier = new RowSetComparison(expected);
assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
// Next call, return with data.
assertTrue(scan.next());
verifier.verifyAndClearAll(fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.physical.impl.scan.ScanOperatorExec in project drill by apache.
the class TestScanLateSchema method testNonEmptyFirstBatch.
/**
* Test the case where the reader does not play the "first batch contains
* only schema" game, and instead returns data. The Scan operator will
* split the first batch into two: one with schema only, another with
* data.
*/
@Test
public void testNonEmptyFirstBatch() {
ReaderCreator creator = negotiator -> {
MockLateSchemaReader reader = new MockLateSchemaReader(negotiator);
reader.batchLimit = 2;
reader.returnDataOnFirst = true;
return reader;
};
ScanFixture scanFixture = simpleFixture(creator);
ScanOperatorExec scan = scanFixture.scanOp;
// First batch. The reader returns a non-empty batch. The scan
// operator strips off the schema and returns just that.
assertTrue(scan.buildSchema());
SingleRowSet expected = makeExpected();
assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
assertEquals(0, scan.batchAccessor().rowCount());
scan.batchAccessor().release();
// Second batch. Returns the "look-ahead" batch returned by
// the reader earlier.
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// Third batch, normal case.
assertTrue(scan.next());
RowSetUtilities.verify(makeExpected(20), fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.physical.impl.scan.ScanOperatorExec in project drill by apache.
the class TestScanLateSchema method testLateSchemaEarlyReaderClose.
/**
* Test the case that a late schema reader is closed after discovering
* schema, before any calls to next().
*/
@Test
public void testLateSchemaEarlyReaderClose() {
// Create a mock reader, return two batches: one schema-only, another with data.
ObservableCreator creator = new ObservableCreator() {
@Override
public ManagedReader create(SchemaNegotiator negotiator) {
MockLateSchemaReader reader = new MockLateSchemaReader(negotiator);
reader.batchLimit = 2;
reader.returnDataOnFirst = false;
return reader;
}
};
ScanFixture scanFixture = simpleFixture(creator);
ScanOperatorExec scan = scanFixture.scanOp;
// Get the schema as above.
assertTrue(scan.buildSchema());
// No lookahead batch created.
scanFixture.close();
MockLateSchemaReader reader = creator.reader();
assertEquals(1, reader.batchCount);
assertTrue(reader.closeCalled);
}
use of org.apache.drill.exec.physical.impl.scan.ScanOperatorExec in project drill by apache.
the class MockScanBatchCreator method extendedMockScan.
private CloseableRecordBatch extendedMockScan(FragmentContext context, MockSubScanPOP config, List<MockScanEntry> entries) {
List<SchemaPath> projList = new LinkedList<>();
projList.add(SchemaPath.STAR_COLUMN);
// Create batch readers up front. Handy when we know there are
// only one or two; else use an iterator and create them on the fly.
final List<ManagedReader<SchemaNegotiator>> readers = new LinkedList<>();
for (final MockTableDef.MockScanEntry e : entries) {
readers.add(new ExtendedMockBatchReader(e));
}
// Limit the batch size to 10 MB, or whatever the operator definition
// specified.
int batchSizeBytes = 10 * 1024 * 1024;
MockTableDef.MockScanEntry first = entries.get(0);
if (first.getBatchSize() > 0) {
batchSizeBytes = first.getBatchSize();
}
// Set the scan to allow the maximum row count, allowing
// each reader to adjust the batch size smaller if desired.
ScanFrameworkBuilder builder = new ScanFrameworkBuilder();
builder.batchByteLimit(batchSizeBytes);
builder.projection(projList);
builder.setReaderFactory(new BasicScanFactory(readers.iterator()));
ManagedScanFramework framework = new ManagedScanFramework(builder);
return new OperatorRecordBatch(context, config, new ScanOperatorExec(framework, false), false);
}
Aggregations