Search in sources :

Example 26 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class KuduRecordWriterImpl method updateSchema.

@Override
public void updateSchema(VectorAccessible batch) throws IOException {
    BatchSchema schema = batch.getSchema();
    int i = 0;
    try {
        if (!checkForTable(name)) {
            List<ColumnSchema> columns = new ArrayList<>();
            for (MaterializedField f : schema) {
                columns.add(new ColumnSchema.ColumnSchemaBuilder(f.getLastName(), getType(f.getType())).nullable(f.getType().getMode() == DataMode.OPTIONAL).key(i == 0).build());
                i++;
            }
            Schema kuduSchema = new Schema(columns);
            table = client.createTable(name, kuduSchema, new CreateTableOptions());
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) Schema(org.apache.kudu.Schema) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnSchema(org.apache.kudu.ColumnSchema) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.kudu.ColumnSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) CreateTableOptions(org.apache.kudu.client.CreateTableOptions) IOException(java.io.IOException) UserException(org.apache.drill.common.exceptions.UserException) IOException(java.io.IOException)

Example 27 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class DumpCat method doQuery.

/**
   * Querymode:
   * $drill-dumpcat --file=local:///tmp/drilltrace/[queryid]_[tag]_[majorid]_[minor]_[operator]
   *   Batches: 135
   *   Records: 53,214/53,214 // the first one is the selected records.  The second number is the total number of records.
   *   Selected Records: 53,214
   *   Average Record Size: 74 bytes
   *   Total Data Size: 12,345 bytes
   *   Number of Empty Batches: 1
   *   Schema changes: 1
   *   Schema change batch indices: 0
   * @throws Exception
   */
protected void doQuery(FileInputStream input) throws Exception {
    int batchNum = 0;
    int emptyBatchNum = 0;
    BatchSchema prevSchema = null;
    final List<Integer> schemaChangeIdx = Lists.newArrayList();
    final BatchMetaInfo aggBatchMetaInfo = new BatchMetaInfo();
    while (input.available() > 0) {
        final VectorAccessibleSerializable vcSerializable = new VectorAccessibleSerializable(DumpCat.allocator);
        vcSerializable.readFromStream(input);
        final VectorContainer vectorContainer = (VectorContainer) vcSerializable.get();
        aggBatchMetaInfo.add(getBatchMetaInfo(vcSerializable));
        if (vectorContainer.getRecordCount() == 0) {
            emptyBatchNum++;
        }
        if (prevSchema != null && !vectorContainer.getSchema().equals(prevSchema)) {
            schemaChangeIdx.add(batchNum);
        }
        prevSchema = vectorContainer.getSchema();
        batchNum++;
        vectorContainer.zeroVectors();
    }
    /* output the summary stat */
    System.out.println(String.format("Total # of batches: %d", batchNum));
    //output: rows, selectedRows, avg rec size, total data size.
    System.out.println(aggBatchMetaInfo.toString());
    System.out.println(String.format("Empty batch : %d", emptyBatchNum));
    System.out.println(String.format("Schema changes : %d", schemaChangeIdx.size()));
    System.out.println(String.format("Schema change batch index : %s", schemaChangeIdx.toString()));
}
Also used : VectorAccessibleSerializable(org.apache.drill.exec.cache.VectorAccessibleSerializable) BatchSchema(org.apache.drill.exec.record.BatchSchema) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 28 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class TestMiniPlan method testUnionFilter.

@Test
public void testUnionFilter() throws Exception {
    List<String> leftJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 1 }]", "[{\"a\": 5, \"b\" : 5},{\"a\": 3, \"b\" : 8}]", "[{\"a\": 40, \"b\" : 3},{\"a\": 13, \"b\" : 100}]");
    List<String> rightJsonBatches = Lists.newArrayList("[{\"a\": 5, \"b\" : 10 }]", "[{\"a\": 50, \"b\" : 100}]");
    RecordBatch batch = new PopBuilder().physicalOperator(// Children list is provided through RecordBatch
    new UnionAll(Collections.EMPTY_LIST)).addInputAsChild().physicalOperator(new Filter(null, parseExpr("a=5"), 1.0f)).addJsonScanAsChild().jsonBatches(leftJsonBatches).columnsToRead("a", "b").buildAddAsInput().buildAddAsInput().addInputAsChild().physicalOperator(new Filter(null, parseExpr("a=50"), 1.0f)).addJsonScanAsChild().jsonBatches(rightJsonBatches).columnsToRead("a", "b").buildAddAsInput().buildAddAsInput().build();
    BatchSchema expectedSchema = new SchemaBuilder().addNullable("a", TypeProtos.MinorType.BIGINT).addNullable("b", TypeProtos.MinorType.BIGINT).withSVMode(BatchSchema.SelectionVectorMode.NONE).build();
    new MiniPlanTestBuilder().root(batch).expectedSchema(expectedSchema).baselineValues(5l, 1l).baselineValues(5l, 5l).baselineValues(50l, 100l).go();
}
Also used : Filter(org.apache.drill.exec.physical.config.Filter) BatchSchema(org.apache.drill.exec.record.BatchSchema) RecordBatch(org.apache.drill.exec.record.RecordBatch) SchemaBuilder(org.apache.drill.test.rowSet.SchemaBuilder) UnionAll(org.apache.drill.exec.physical.config.UnionAll) Test(org.junit.Test)

Example 29 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class TestMiniPlan method testSimpleParquetScan.

@Test
public void testSimpleParquetScan() throws Exception {
    String file = FileUtils.getResourceAsFile("/tpchmulti/region/01.parquet").toURI().toString();
    RecordBatch scanBatch = new ParquetScanBuilder().fileSystem(fs).columnsToRead("R_REGIONKEY").inputPaths(Lists.newArrayList(file)).build();
    BatchSchema expectedSchema = new SchemaBuilder().add("R_REGIONKEY", TypeProtos.MinorType.BIGINT).build();
    new MiniPlanTestBuilder().root(scanBatch).expectedSchema(expectedSchema).baselineValues(0L).baselineValues(1L).go();
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) RecordBatch(org.apache.drill.exec.record.RecordBatch) SchemaBuilder(org.apache.drill.test.rowSet.SchemaBuilder) Test(org.junit.Test)

Example 30 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class TestBatchValidator method testValidRepeated.

@Test
public void testValidRepeated() {
    BatchSchema schema = new SchemaBuilder().add("a", MinorType.INT, DataMode.REPEATED).add("b", MinorType.VARCHAR, DataMode.REPEATED).build();
    SingleRowSet batch = fixture.rowSetBuilder(schema).add(new int[] {}, new String[] {}).add(new int[] { 1, 2, 3 }, new String[] { "fred", "barney", "wilma" }).add(new int[] { 4 }, new String[] { "dino" }).build();
    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
    validator.validate();
    assertTrue(validator.errors().isEmpty());
    batch.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.test.rowSet.SchemaBuilder) BatchValidator(org.apache.drill.exec.physical.impl.validate.BatchValidator) Test(org.junit.Test)

Aggregations

BatchSchema (org.apache.drill.exec.record.BatchSchema)39 SchemaBuilder (org.apache.drill.test.rowSet.SchemaBuilder)26 Test (org.junit.Test)20 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)18 BatchValidator (org.apache.drill.exec.physical.impl.validate.BatchValidator)10 RowSetReader (org.apache.drill.test.rowSet.RowSet.RowSetReader)8 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 ValueVector (org.apache.drill.exec.vector.ValueVector)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 RecordBatch (org.apache.drill.exec.record.RecordBatch)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 ArrayList (java.util.ArrayList)3 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)3 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)3 DrillBuf (io.netty.buffer.DrillBuf)2 IOException (java.io.IOException)2 UserException (org.apache.drill.common.exceptions.UserException)2 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)2 MinorFragmentEndpoint (org.apache.drill.exec.physical.MinorFragmentEndpoint)2