use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class KuduRecordWriterImpl method updateSchema.
@Override
public void updateSchema(VectorAccessible batch) throws IOException {
BatchSchema schema = batch.getSchema();
int i = 0;
try {
if (!checkForTable(name)) {
List<ColumnSchema> columns = new ArrayList<>();
for (MaterializedField f : schema) {
columns.add(new ColumnSchema.ColumnSchemaBuilder(f.getLastName(), getType(f.getType())).nullable(f.getType().getMode() == DataMode.OPTIONAL).key(i == 0).build());
i++;
}
Schema kuduSchema = new Schema(columns);
table = client.createTable(name, kuduSchema, new CreateTableOptions());
}
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class DumpCat method doQuery.
/**
* Querymode:
* $drill-dumpcat --file=local:///tmp/drilltrace/[queryid]_[tag]_[majorid]_[minor]_[operator]
* Batches: 135
* Records: 53,214/53,214 // the first one is the selected records. The second number is the total number of records.
* Selected Records: 53,214
* Average Record Size: 74 bytes
* Total Data Size: 12,345 bytes
* Number of Empty Batches: 1
* Schema changes: 1
* Schema change batch indices: 0
* @throws Exception
*/
protected void doQuery(FileInputStream input) throws Exception {
int batchNum = 0;
int emptyBatchNum = 0;
BatchSchema prevSchema = null;
final List<Integer> schemaChangeIdx = Lists.newArrayList();
final BatchMetaInfo aggBatchMetaInfo = new BatchMetaInfo();
while (input.available() > 0) {
final VectorAccessibleSerializable vcSerializable = new VectorAccessibleSerializable(DumpCat.allocator);
vcSerializable.readFromStream(input);
final VectorContainer vectorContainer = (VectorContainer) vcSerializable.get();
aggBatchMetaInfo.add(getBatchMetaInfo(vcSerializable));
if (vectorContainer.getRecordCount() == 0) {
emptyBatchNum++;
}
if (prevSchema != null && !vectorContainer.getSchema().equals(prevSchema)) {
schemaChangeIdx.add(batchNum);
}
prevSchema = vectorContainer.getSchema();
batchNum++;
vectorContainer.zeroVectors();
}
/* output the summary stat */
System.out.println(String.format("Total # of batches: %d", batchNum));
//output: rows, selectedRows, avg rec size, total data size.
System.out.println(aggBatchMetaInfo.toString());
System.out.println(String.format("Empty batch : %d", emptyBatchNum));
System.out.println(String.format("Schema changes : %d", schemaChangeIdx.size()));
System.out.println(String.format("Schema change batch index : %s", schemaChangeIdx.toString()));
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class TestMiniPlan method testUnionFilter.
@Test
public void testUnionFilter() throws Exception {
List<String> leftJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 1 }]", "[{\"a\": 5, \"b\" : 5},{\"a\": 3, \"b\" : 8}]", "[{\"a\": 40, \"b\" : 3},{\"a\": 13, \"b\" : 100}]");
List<String> rightJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 10 }]", "[{\"a\": 50, \"b\" : 100}]");
RecordBatch batch = new PopBuilder().physicalOperator(// Children list is provided through RecordBatch
new UnionAll(Collections.EMPTY_LIST)).addInputAsChild().physicalOperator(new Filter(null, parseExpr("a=5"), 1.0f)).addJsonScanAsChild().jsonBatches(leftJsonBatches).columnsToRead("a", "b").buildAddAsInput().buildAddAsInput().addInputAsChild().physicalOperator(new Filter(null, parseExpr("a=50"), 1.0f)).addJsonScanAsChild().jsonBatches(rightJsonBatches).columnsToRead("a", "b").buildAddAsInput().buildAddAsInput().build();
BatchSchema expectedSchema = new SchemaBuilder().addNullable("a", TypeProtos.MinorType.BIGINT).addNullable("b", TypeProtos.MinorType.BIGINT).withSVMode(BatchSchema.SelectionVectorMode.NONE).build();
new MiniPlanTestBuilder().root(batch).expectedSchema(expectedSchema).baselineValues(5l, 1l).baselineValues(5l, 5l).baselineValues(50l, 100l).go();
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class TestMiniPlan method testSimpleParquetScan.
@Test
public void testSimpleParquetScan() throws Exception {
String file = FileUtils.getResourceAsFile("/tpchmulti/region/01.parquet").toURI().toString();
RecordBatch scanBatch = new ParquetScanBuilder().fileSystem(fs).columnsToRead("R_REGIONKEY").inputPaths(Lists.newArrayList(file)).build();
BatchSchema expectedSchema = new SchemaBuilder().add("R_REGIONKEY", TypeProtos.MinorType.BIGINT).build();
new MiniPlanTestBuilder().root(scanBatch).expectedSchema(expectedSchema).baselineValues(0L).baselineValues(1L).go();
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class TestBatchValidator method testValidRepeated.
@Test
public void testValidRepeated() {
BatchSchema schema = new SchemaBuilder().add("a", MinorType.INT, DataMode.REPEATED).add("b", MinorType.VARCHAR, DataMode.REPEATED).build();
SingleRowSet batch = fixture.rowSetBuilder(schema).add(new int[] {}, new String[] {}).add(new int[] { 1, 2, 3 }, new String[] { "fred", "barney", "wilma" }).add(new int[] { 4 }, new String[] { "dino" }).build();
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
validator.validate();
assertTrue(validator.errors().isEmpty());
batch.clear();
}
Aggregations