use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class MockRecordReader method next.
@Override
public int next() {
if (recordsRead >= this.config.getRecords()) {
return 0;
}
final int recordSetSize = Math.min(batchRecordCount, this.config.getRecords() - recordsRead);
recordsRead += recordSetSize;
for (final ValueVector v : valueVectors) {
final ValueVector.Mutator m = v.getMutator();
m.generateTestData(recordSetSize);
}
return recordSetSize;
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class DrillTestWrapper method addToCombinedVectorResults.
/**
* Add to result vectors and compare batch schema against expected schema while iterating batches.
* @param batches
* @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException
* if encounter different batch schema.
* @return
* @throws SchemaChangeException
* @throws UnsupportedEncodingException
*/
public static Map<String, List<Object>> addToCombinedVectorResults(Iterable<VectorAccessible> batches, BatchSchema expectedSchema) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
Map<String, List<Object>> combinedVectors = new TreeMap<>();
long totalRecords = 0;
BatchSchema schema = null;
for (VectorAccessible loader : batches) {
if (expectedSchema != null) {
if (!expectedSchema.equals(loader.getSchema())) {
throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema));
}
}
// SchemaChangeException, so check/clean throws clause above.
if (schema == null) {
schema = loader.getSchema();
for (MaterializedField mf : schema) {
combinedVectors.put(SchemaPath.getSimplePath(mf.getPath()).toExpr(), new ArrayList<Object>());
}
} else {
// TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode
// of the current batch, the check for a null schema is used to only mutate the schema once
// need to add new vectors and null fill for previous batches? distinction between null and non-existence important?
schema = loader.getSchema();
}
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
ValueVector[] vectors;
if (w.isHyper()) {
vectors = w.getValueVectors();
} else {
vectors = new ValueVector[] { w.getValueVector() };
}
SelectionVector2 sv2 = null;
SelectionVector4 sv4 = null;
switch(schema.getSelectionVectorMode()) {
case TWO_BYTE:
sv2 = loader.getSelectionVector2();
break;
case FOUR_BYTE:
sv4 = loader.getSelectionVector4();
break;
}
if (sv4 != null) {
for (int j = 0; j < sv4.getCount(); j++) {
int complexIndex = sv4.get(j);
int batchIndex = complexIndex >> 16;
int recordIndexInBatch = complexIndex & 65535;
Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
} else {
for (ValueVector vv : vectors) {
for (int j = 0; j < loader.getRecordCount(); j++) {
int index;
if (sv2 != null) {
index = sv2.getIndex(j);
} else {
index = j;
}
Object obj = vv.getAccessor().getObject(index);
if (obj != null) {
if (obj instanceof Text) {
obj = obj.toString();
}
}
combinedVectors.get(field).add(obj);
}
}
}
}
}
return combinedVectors;
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class DrillTestWrapper method addToHyperVectorMap.
private Map<String, HyperVectorValueIterator> addToHyperVectorMap(final List<QueryDataBatch> records, final RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException {
// TODO - this does not handle schema changes
Map<String, HyperVectorValueIterator> combinedVectors = new TreeMap<>();
long totalRecords = 0;
QueryDataBatch batch;
int size = records.size();
for (int i = 0; i < size; i++) {
batch = records.get(i);
loader.load(batch.getHeader().getDef(), batch.getData());
logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
totalRecords += loader.getRecordCount();
for (VectorWrapper<?> w : loader) {
String field = SchemaPath.getSimplePath(w.getField().getPath()).toExpr();
if (!combinedVectors.containsKey(field)) {
MaterializedField mf = w.getField();
ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1);
vvList[0] = w.getValueVector();
combinedVectors.put(field, new HyperVectorValueIterator(mf, new HyperVectorWrapper<>(mf, vvList)));
} else {
combinedVectors.get(field).getHyperVector().addVector(w.getValueVector());
}
}
}
for (HyperVectorValueIterator hvi : combinedVectors.values()) {
hvi.determineTotalSize();
}
return combinedVectors;
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class MergingRecordBatch method buildSchema.
@Override
public void buildSchema() throws SchemaChangeException {
// find frag provider that has data to use to build schema, and put in tempBatchHolder for later use
tempBatchHolder = new RawFragmentBatch[fragProviders.length];
int i = 0;
try {
while (true) {
if (i >= fragProviders.length) {
state = BatchState.DONE;
return;
}
final RawFragmentBatch batch = getNext(i);
if (batch == null) {
if (!context.shouldContinue()) {
state = BatchState.STOP;
} else {
state = BatchState.DONE;
}
break;
}
if (batch.getHeader().getDef().getFieldCount() == 0) {
i++;
continue;
}
tempBatchHolder[i] = batch;
for (final SerializedField field : batch.getHeader().getDef().getFieldList()) {
@SuppressWarnings("resource") final ValueVector v = outgoingContainer.addOrGet(MaterializedField.create(field));
v.allocateNew();
}
break;
}
} catch (final IOException e) {
throw new DrillRuntimeException(e);
}
outgoingContainer = VectorContainer.canonicalize(outgoingContainer);
outgoingContainer.buildSchema(SelectionVectorMode.NONE);
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class HashAggBatch method createAggregatorInternal.
private HashAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException {
CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
ClassGenerator<HashAggregator> cg = top.getRoot();
ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
top.plainJavaCapable(true);
// Uncomment out this line to debug the generated code.
// top.saveCodeForDebugging(true);
container.clear();
int numGroupByExprs = (popConfig.getGroupByExprs() != null) ? popConfig.getGroupByExprs().size() : 0;
int numAggrExprs = (popConfig.getAggrExprs() != null) ? popConfig.getAggrExprs().size() : 0;
aggrExprs = new LogicalExpression[numAggrExprs];
groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
aggrOutFieldIds = new TypedFieldId[numAggrExprs];
ErrorCollector collector = new ErrorCollectorImpl();
int i;
for (i = 0; i < numGroupByExprs; i++) {
NamedExpression ne = popConfig.getGroupByExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
// add this group-by vector to the output container
groupByOutFieldIds[i] = container.add(vv);
}
for (i = 0; i < numAggrExprs; i++) {
NamedExpression ne = popConfig.getAggrExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
if (collector.hasErrors()) {
throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString());
}
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
aggrOutFieldIds[i] = container.add(vv);
aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
}
setupUpdateAggrValues(cgInner);
setupGetIndex(cg);
cg.getBlock("resetValues")._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
HashAggregator agg = context.getImplementationClass(top);
HashTableConfig htConfig = // TODO - fix the validator on this option
new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, popConfig.getGroupByExprs(), null, /* no probe exprs */
comparators);
agg.setup(popConfig, htConfig, context, this.stats, oContext.getAllocator(), incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), groupByOutFieldIds, this.container);
return agg;
}
Aggregations