Search in sources :

Example 21 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class SortRecordBatchBuilder method getHeldRecordBatches.

public List<VectorContainer> getHeldRecordBatches() {
    ArrayList<VectorContainer> containerList = Lists.newArrayList();
    for (BatchSchema bs : batches.keySet()) {
        for (RecordBatchData bd : batches.get(bs)) {
            VectorContainer c = bd.getContainer();
            c.setRecordCount(bd.getRecordCount());
            containerList.add(c);
        }
    }
    batches.clear();
    return containerList;
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 22 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class MergingRecordBatch method isSameSchemaAmongBatches.

private boolean isSameSchemaAmongBatches(final RecordBatchLoader[] batchLoaders) {
    Preconditions.checkArgument(batchLoaders.length > 0, "0 batch is not allowed!");
    final BatchSchema schema = batchLoaders[0].getSchema();
    for (int i = 1; i < batchLoaders.length; i++) {
        if (!schema.equals(batchLoaders[i].getSchema())) {
            logger.error("Schemas are different. Schema 1 : " + schema + ", Schema 2: " + batchLoaders[i].getSchema());
            return false;
        }
    }
    return true;
}
Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint)

Example 23 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class DrillTestWrapper method addToCombinedVectorResults.

/**
   * Add to result vectors and compare batch schema against expected schema while iterating batches.
   * @param batches
   * @param  expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
   *                       if encounter different batch schema.
   * @return
   * @throws SchemaChangeException
   * @throws UnsupportedEncodingException
   */
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
    // TODO - this does not handle schema changes
    Map<String, List<Object>> combinedVectors = new TreeMap<>();
    long totalRecords = 0;
    BatchSchema schema = null;
    for (VectorAccessible loader : batches) {
        if (expectedSchema != null) {
            if (!expectedSchema.equals(loader.getSchema())) {
                throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s.  Expected schema : %s", loader.getSchema(), expectedSchema));
            }
        }
        // SchemaChangeException, so check/clean throws clause above.
        if (schema == null) {
            schema = loader.getSchema();
            for (MaterializedField mf : schema) {
                combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
            }
        } else {
            // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
            // of the current batch, the check for a null schema is used to only mutate the schema once
            // need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
            schema = loader.getSchema();
        }
        logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
        totalRecords += loader.getRecordCount();
        for (VectorWrapper<?> w : loader) {
            String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
            ValueVector[] vectors;
            if (w.isHyper()) {
                vectors = w.getValueVectors();
            } else {
                vectors = new ValueVector[] { w.getValueVector() };
            }
            SelectionVector2 sv2 = null;
            SelectionVector4 sv4 = null;
            switch(schema.getSelectionVectorMode()) {
                case TWO_BYTE:
                    sv2 = loader.getSelectionVector2();
                    break;
                case FOUR_BYTE:
                    sv4 = loader.getSelectionVector4();
                    break;
            }
            if (sv4 != null) {
                for (int j = 0; j < sv4.getCount(); j++) {
                    int complexIndex = sv4.get(j);
                    int batchIndex = complexIndex >> 16;
                    int recordIndexInBatch = complexIndex & 65535;
                    Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
                    if (obj != null) {
                        if (obj instanceof Text) {
                            obj = obj.toString();
                        }
                    }
                    combinedVectors.get(field).add(obj);
                }
            } else {
                for (ValueVector vv : vectors) {
                    for (int j = 0; j < loader.getRecordCount(); j++) {
                        int index;
                        if (sv2 != null) {
                            index = sv2.getIndex(j);
                        } else {
                            index = j;
                        }
                        Object obj = vv.getAccessor().getObject(index);
                        if (obj != null) {
                            if (obj instanceof Text) {
                                obj = obj.toString();
                            }
                        }
                        combinedVectors.get(field).add(obj);
                    }
                }
            }
        }
    }
    return combinedVectors;
}
Also used : VectorAccessible(org.apache.drill.exec.record.VectorAccessible) MaterializedField(org.apache.drill.exec.record.MaterializedField) Text(org.apache.drill.exec.util.Text) TreeMap(java.util.TreeMap) ValueVector(org.apache.drill.exec.vector.ValueVector) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SelectionVector2(org.apache.drill.exec.record.selection.SelectionVector2) ArrayList(java.util.ArrayList) List(java.util.List) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Example 24 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class DrillTestWrapper method compareSchemaOnly.

protected void compareSchemaOnly() throws Exception {
    RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
    List<QueryDataBatch> actual;
    QueryDataBatch batch = null;
    try {
        test(testOptionSettingQueries);
        actual = testRunAndReturn(queryType, query);
        batch = actual.get(0);
        loader.load(batch.getHeader().getDef(), batch.getData());
        final BatchSchema schema = loader.getSchema();
        final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = testBuilder.getExpectedSchema();
        if (schema.getFieldCount() != expectedSchema.size()) {
            throw new Exception("Expected and actual numbers of columns do not match.");
        }
        for (int i = 0; i < schema.getFieldCount(); ++i) {
            final String actualSchemaPath = schema.getColumn(i).getPath();
            final TypeProtos.MajorType actualMajorType = schema.getColumn(i).getType();
            final String expectedSchemaPath = expectedSchema.get(i).getLeft().getAsUnescapedPath();
            final TypeProtos.MajorType expectedMajorType = expectedSchema.get(i).getValue();
            if (!actualSchemaPath.equals(expectedSchemaPath) || !actualMajorType.equals(expectedMajorType)) {
                throw new Exception(String.format("Schema path or type mismatch for column #%d:\n" + "Expected schema path: %s\nActual   schema path: %s\nExpected type: %s\nActual   type: %s", i, expectedSchemaPath, actualSchemaPath, Types.toString(expectedMajorType), Types.toString(actualMajorType)));
            }
        }
    } finally {
        if (batch != null) {
            batch.release();
        }
        loader.clear();
    }
}
Also used : QueryDataBatch(org.apache.drill.exec.rpc.user.QueryDataBatch) BatchSchema(org.apache.drill.exec.record.BatchSchema) RecordBatchLoader(org.apache.drill.exec.record.RecordBatchLoader) TypeProtos(org.apache.drill.common.types.TypeProtos) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Pair(org.apache.commons.lang3.tuple.Pair)

Example 25 with BatchSchema

use of org.apache.drill.exec.record.BatchSchema in project drill by apache.

the class PriorityQueueTemplate method resetQueue.

@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
    assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
    BatchSchema schema = container.getSchema();
    VectorContainer newContainer = new VectorContainer();
    for (MaterializedField field : schema) {
        int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
        newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
    }
    newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
    // Cleanup before recreating hyperbatch and sv4.
    cleanup();
    hyperBatch = new ExpandableHyperContainer(newContainer);
    batchCount = hyperBatch.iterator().next().getValueVectors().length;
    @SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
    heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
    // Reset queue size (most likely to be set to limit).
    queueSize = 0;
    for (int i = 0; i < v4.getTotalCount(); i++) {
        heapSv4.set(i, v4.get(i));
        ++queueSize;
    }
    v4.clear();
    doSetup(context, hyperBatch, null);
}
Also used : ExpandableHyperContainer(org.apache.drill.exec.record.ExpandableHyperContainer) BatchSchema(org.apache.drill.exec.record.BatchSchema) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorContainer(org.apache.drill.exec.record.VectorContainer) DrillBuf(io.netty.buffer.DrillBuf) SelectionVector4(org.apache.drill.exec.record.selection.SelectionVector4)

Aggregations

BatchSchema (org.apache.drill.exec.record.BatchSchema)39 SchemaBuilder (org.apache.drill.test.rowSet.SchemaBuilder)26 Test (org.junit.Test)20 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)18 BatchValidator (org.apache.drill.exec.physical.impl.validate.BatchValidator)10 RowSetReader (org.apache.drill.test.rowSet.RowSet.RowSetReader)8 MaterializedField (org.apache.drill.exec.record.MaterializedField)7 ValueVector (org.apache.drill.exec.vector.ValueVector)6 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 RecordBatch (org.apache.drill.exec.record.RecordBatch)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 ArrayList (java.util.ArrayList)3 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)3 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)3 DrillBuf (io.netty.buffer.DrillBuf)2 IOException (java.io.IOException)2 UserException (org.apache.drill.common.exceptions.UserException)2 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)2 MinorFragmentEndpoint (org.apache.drill.exec.physical.MinorFragmentEndpoint)2