use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class SortRecordBatchBuilder method getHeldRecordBatches.
public List<VectorContainer> getHeldRecordBatches() {
ArrayList<VectorContainer> containerList = Lists.newArrayList();
for (BatchSchema bs : batches.keySet()) {
for (RecordBatchData bd : batches.get(bs)) {
VectorContainer c = bd.getContainer();
c.setRecordCount(bd.getRecordCount());
containerList.add(c);
}
}
batches.clear();
return containerList;
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class MergingRecordBatch method isSameSchemaAmongBatches.
private boolean isSameSchemaAmongBatches(final RecordBatchLoader[] batchLoaders) {
Preconditions.checkArgument(batchLoaders.length > 0, "0 batch is not allowed!");
final BatchSchema schema = batchLoaders[0].getSchema();
for (int i = 1; i < batchLoaders.length; i++) {
if (!schema.equals(batchLoaders[i].getSchema())) {
logger.error("Schemas are different. Schema 1 : " + schema + ", Schema 2: " + batchLoaders[i].getSchema());
return false;
}
}
return true;
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @return
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
Map<String, List<Object>> combinedVectors = new TreeMap<>();
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
if (expectedSchema != null) {
if (!expectedSchema.equals(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
// SchemaChangeException, so check/clean throws clause above.
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
return combinedVectors;
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class DrillTestWrapper method compareSchemaOnly.
protected void compareSchemaOnly() throws Exception {
RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
List<QueryDataBatch> actual;
QueryDataBatch batch = null;
try {
test(testOptionSettingQueries);
actual = testRunAndReturn(queryType, query);
batch = actual.get(0);
loader.load(batch.getHeader().getDef(), batch.getData());
final BatchSchema schema = loader.getSchema();
final List<Pair<SchemaPath, TypeProtos.MajorType>> expectedSchema = testBuilder.getExpectedSchema();
if (schema.getFieldCount() != expectedSchema.size()) {
throw new Exception("Expected and actual numbers of columns do not match.");
}
for (int i = 0; i < schema.getFieldCount(); ++i) {
final String actualSchemaPath = schema.getColumn(i).getPath();
final TypeProtos.MajorType actualMajorType = schema.getColumn(i).getType();
final String expectedSchemaPath = expectedSchema.get(i).getLeft().getAsUnescapedPath();
final TypeProtos.MajorType expectedMajorType = expectedSchema.get(i).getValue();
if (!actualSchemaPath.equals(expectedSchemaPath) || !actualMajorType.equals(expectedMajorType)) {
throw new Exception(String.format("Schema path or type mismatch for column #%d:\n" + "Expected schema path: %s\nActual schema path: %s\nExpected type: %s\nActual type: %s", i, expectedSchemaPath, actualSchemaPath, Types.toString(expectedMajorType), Types.toString(actualMajorType)));
}
}
} finally {
if (batch != null) {
batch.release();
}
loader.clear();
}
}
use of org.apache.drill.exec.record.BatchSchema in project drill by apache.
the class PriorityQueueTemplate method resetQueue.
@Override
public void resetQueue(VectorContainer container, SelectionVector4 v4) throws SchemaChangeException {
assert container.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.FOUR_BYTE;
BatchSchema schema = container.getSchema();
VectorContainer newContainer = new VectorContainer();
for (MaterializedField field : schema) {
int[] ids = container.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds();
newContainer.add(container.getValueAccessorById(field.getValueClass(), ids).getValueVectors());
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
// Cleanup before recreating hyperbatch and sv4.
cleanup();
hyperBatch = new ExpandableHyperContainer(newContainer);
batchCount = hyperBatch.iterator().next().getValueVectors().length;
@SuppressWarnings("resource") final DrillBuf drillBuf = allocator.buffer(4 * (limit + 1));
heapSv4 = new SelectionVector4(drillBuf, limit, Character.MAX_VALUE);
// Reset queue size (most likely to be set to limit).
queueSize = 0;
for (int i = 0; i < v4.getTotalCount(); i++) {
heapSv4.set(i, v4.get(i));
++queueSize;
}
v4.clear();
doSetup(context, hyperBatch, null);
}
Aggregations