use of org.apache.drill.test.rowSet.RowSetComparison in project drill by apache.
the class TestRowBatchMerger method testFlatWithNulls.
@Test
public void testFlatWithNulls() {
// Create the first batch
RowSetSource first = makeFirst();
// Create null columns
NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow resolvedTuple = new ResolvedRow(builder);
resolvedTuple.add(new TestProjection(resolvedTuple, 1));
resolvedTuple.add(resolvedTuple.nullBuilder().add("null1"));
resolvedTuple.add(resolvedTuple.nullBuilder().add("null2", Types.optional(MinorType.VARCHAR)));
resolvedTuple.add(new TestProjection(resolvedTuple, 0));
// Build the null values
ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
builder.build(cache);
builder.load(first.rowSet().rowCount());
// Do the merge
VectorContainer output = new VectorContainer(fixture.allocator());
resolvedTuple.project(first.rowSet().container(), output);
output.setRecordCount(first.rowSet().rowCount());
RowSet result = fixture.wrap(output);
// Verify
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("null1", MinorType.INT).addNullable("null2", MinorType.VARCHAR).add("d", MinorType.VARCHAR).buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, null, null, "barney").addRow(20, null, null, "wilma").build();
new RowSetComparison(expected).verifyAndClearAll(result);
builder.close();
}
use of org.apache.drill.test.rowSet.RowSetComparison in project drill by apache.
the class TestScanLateSchema method testLateSchemaLifecycle.
/**
* Most basic test of a reader that discovers its schema as it goes along.
* The purpose is to validate the most basic life-cycle steps before trying
* more complex variations.
*/
@Test
public void testLateSchemaLifecycle() {
// Create a mock reader, return two batches: one schema-only, another with data.
ReaderCreator creator = negotiator -> {
MockLateSchemaReader reader = new MockLateSchemaReader(negotiator);
reader.batchLimit = 2;
reader.returnDataOnFirst = false;
return reader;
};
// Create the scan operator
ScanFixture scanFixture = simpleFixture(creator);
ScanOperatorExec scan = scanFixture.scanOp;
// First batch: build schema. The reader does not help: it returns an
// empty first batch.
assertTrue(scan.buildSchema());
assertEquals(0, scan.batchAccessor().rowCount());
// Create the expected result.
SingleRowSet expected = makeExpected(20);
RowSetComparison verifier = new RowSetComparison(expected);
assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
// Next call, return with data.
assertTrue(scan.next());
verifier.verifyAndClearAll(fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.test.rowSet.RowSetComparison in project drill by apache.
the class TestCsvWithSchema method testMultiFileSchemaMissingCol.
/**
* Test the case that a file does not contain a required column (in this case,
* id in the third file.) There are two choices. 1) we could fail the query,
* 2) we can muddle through as best we can. The scan framework chooses to
* muddle through by assuming a default value of 0 for the missing int
* column.
* <p>
* Inserts an ORDER BY to force a single batch in a known order. Assumes
* the other ORDER BY tests pass.
* <p>
* This test shows that having consistent types is sufficient for the sort
* operator to work; the DAG will include a project operator that reorders
* the columns when produced by readers in different orders. (Column ordering
* is more an abstract concept anyway in a columnar system such as Drill.)
*/
@Test
public void testMultiFileSchemaMissingCol() throws Exception {
RowSet expected = null;
try {
enableSchemaSupport();
enableMultiScan();
String tablePath = buildTable("schemaMissingCols", raggedMulti1Contents, reordered2Contents, multi3Contents);
run(SCHEMA_SQL, tablePath);
// Wildcard expands to union of schema + table. In this case
// all table columns appear in the schema (though not all schema
// columns appear in the table.)
String sql = "SELECT id, `name`, `date`, gender, comment FROM " + tablePath + " ORDER BY id";
TupleMetadata expectedSchema = new SchemaBuilder().add("id", MinorType.INT).add("name", MinorType.VARCHAR).addNullable("date", MinorType.DATE).add("gender", MinorType.VARCHAR).add("comment", MinorType.VARCHAR).buildSchema();
expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(0, "dino", LocalDate.of(2018, 9, 1), "NA", "ABC").addRow(1, "wilma", LocalDate.of(2019, 1, 18), "female", "ABC").addRow(2, "fred", LocalDate.of(2019, 1, 19), "male", "ABC").addRow(3, "barney", LocalDate.of(2001, 1, 16), "NA", "ABC").addRow(4, "betty", LocalDate.of(2019, 5, 4), "NA", "ABC").build();
for (int i = 0; i < 10; i++) {
RowSet results = client.queryBuilder().sql(sql).rowSet();
new RowSetComparison(expected).verifyAndClear(results);
}
} finally {
expected.clear();
resetSchemaSupport();
resetMultiScan();
}
}
use of org.apache.drill.test.rowSet.RowSetComparison in project drill by apache.
the class TestLogReaderIssue method testIssue7853UseValidDatetimeFormat.
@Test
public void testIssue7853UseValidDatetimeFormat() throws Exception {
String sql = "SELECT type, `time` FROM `dfs.data`.`root/issue7853.log`";
QueryBuilder builder = client.queryBuilder().sql(sql);
RowSet sets = builder.rowSet();
TupleMetadata schema = new SchemaBuilder().addNullable("type", MinorType.VARCHAR).addNullable("time", MinorType.TIMESTAMP).buildSchema();
RowSet expected = new RowSetBuilder(client.allocator(), schema).addRow("h2", 1611446100664L).addRow("h2", 1611446100666L).build();
new RowSetComparison(expected).verifyAndClearAll(sets);
}
use of org.apache.drill.test.rowSet.RowSetComparison in project drill by apache.
the class TestCsvWithSchema method testMultiFileSchema.
/**
* Use a schema with explicit projection to get a consistent view
* of the table schema, even if columns are missing, rows are ragged,
* and column order changes.
* <p>
* Force the scans to occur in distinct fragments so the order of the
* file batches is random.
*/
@Test
public void testMultiFileSchema() throws Exception {
RowSet expected1 = null;
RowSet expected2 = null;
try {
enableSchemaSupport();
enableMultiScan();
String tablePath = buildTable("multiFileSchema", raggedMulti1Contents, reordered2Contents);
run(SCHEMA_SQL, tablePath);
// Wildcard expands to union of schema + table. In this case
// all table columns appear in the schema (though not all schema
// columns appear in the table.)
String sql = "SELECT id, `name`, `date`, gender, comment FROM " + tablePath;
TupleMetadata expectedSchema = new SchemaBuilder().add("id", MinorType.INT).add("name", MinorType.VARCHAR).addNullable("date", MinorType.DATE).add("gender", MinorType.VARCHAR).add("comment", MinorType.VARCHAR).buildSchema();
expected1 = new RowSetBuilder(client.allocator(), expectedSchema).addRow(1, "wilma", LocalDate.of(2019, 1, 18), "female", "ABC").addRow(2, "fred", LocalDate.of(2019, 1, 19), "male", "ABC").addRow(4, "betty", LocalDate.of(2019, 5, 4), "NA", "ABC").build();
expected2 = new RowSetBuilder(client.allocator(), expectedSchema).addRow(3, "barney", LocalDate.of(2001, 1, 16), "NA", "ABC").build();
for (int i = 0; i < 10; i++) {
boolean sawSchema = false;
boolean sawFile1 = false;
boolean sawFile2 = false;
Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql).rowSetIterator();
while (iter.hasNext()) {
RowSet result = iter.next();
if (result.rowCount() == 3) {
sawFile1 = true;
new RowSetComparison(expected1).verifyAndClear(result);
} else if (result.rowCount() == 1) {
sawFile2 = true;
new RowSetComparison(expected2).verifyAndClear(result);
} else {
assertEquals(0, result.rowCount());
sawSchema = true;
}
}
assertTrue(!SCHEMA_BATCH_ENABLED || sawSchema);
assertTrue(sawFile1);
assertTrue(sawFile2);
}
} finally {
expected1.clear();
expected2.clear();
resetSchemaSupport();
resetMultiScan();
}
}
Aggregations