use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class HashAggBatch method createAggregatorInternal.
private HashAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException {
CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
ClassGenerator<HashAggregator> cg = top.getRoot();
ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
top.plainJavaCapable(true);
// Uncomment out this line to debug the generated code.
// top.saveCodeForDebugging(true);
container.clear();
int numGroupByExprs = (popConfig.getGroupByExprs() != null) ? popConfig.getGroupByExprs().size() : 0;
int numAggrExprs = (popConfig.getAggrExprs() != null) ? popConfig.getAggrExprs().size() : 0;
aggrExprs = new LogicalExpression[numAggrExprs];
groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
aggrOutFieldIds = new TypedFieldId[numAggrExprs];
ErrorCollector collector = new ErrorCollectorImpl();
int i;
for (i = 0; i < numGroupByExprs; i++) {
NamedExpression ne = popConfig.getGroupByExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
// add this group-by vector to the output container
groupByOutFieldIds[i] = container.add(vv);
}
for (i = 0; i < numAggrExprs; i++) {
NamedExpression ne = popConfig.getAggrExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
if (collector.hasErrors()) {
throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString());
}
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
aggrOutFieldIds[i] = container.add(vv);
aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
}
setupUpdateAggrValues(cgInner);
setupGetIndex(cg);
cg.getBlock("resetValues")._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
HashAggregator agg = context.getImplementationClass(top);
HashTableConfig htConfig = // TODO - fix the validator on this option
new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, popConfig.getGroupByExprs(), null, /* no probe exprs */
comparators);
agg.setup(popConfig, htConfig, context, this.stats, oContext.getAllocator(), incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), groupByOutFieldIds, this.container);
return agg;
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class HashAggTemplate method allocateOutgoing.
private void allocateOutgoing(int records) {
// Skip the keys and only allocate for outputting the workspace values
// (keys will be output through splitAndTransfer)
Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator();
for (int i = 0; i < numGroupByOutFields; i++) {
outgoingIter.next();
}
while (outgoingIter.hasNext()) {
@SuppressWarnings("resource") ValueVector vv = outgoingIter.next().getValueVector();
// MajorType type = vv.getField().getType();
/*
* In build schema we use the allocation model that specifies exact record count
* so we need to stick with that allocation model until DRILL-2211 is resolved. Using
* 50 as the average bytes per value as is used in HashTable.
*/
AllocationHelper.allocatePrecomputedChildCount(vv, records, VARIABLE_WIDTH_VALUE_SIZE, 0);
}
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class TopNBatch method purge.
private void purge() throws SchemaChangeException {
Stopwatch watch = Stopwatch.createStarted();
VectorContainer c = priorityQueue.getHyperBatch();
VectorContainer newContainer = new VectorContainer(oContext);
@SuppressWarnings("resource") SelectionVector4 selectionVector4 = priorityQueue.getHeapSv4();
SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, context);
SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, context);
if (copier == null) {
copier = RemovingRecordBatch.getGenerated4Copier(batch, context, oContext.getAllocator(), newContainer, newBatch, null);
} else {
for (VectorWrapper<?> i : batch) {
@SuppressWarnings("resource") ValueVector v = TypeHelper.getNewVector(i.getField(), oContext.getAllocator());
newContainer.add(v);
}
copier.setupRemover(context, batch, newBatch);
}
@SuppressWarnings("resource") SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator());
try {
do {
int count = selectionVector4.getCount();
int copiedRecords = copier.copyRecords(0, count);
assert copiedRecords == count;
for (VectorWrapper<?> v : newContainer) {
ValueVector.Mutator m = v.getValueVector().getMutator();
m.setValueCount(count);
}
newContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
newContainer.setRecordCount(count);
builder.add(newBatch);
} while (selectionVector4.next());
selectionVector4.clear();
c.clear();
VectorContainer newQueue = new VectorContainer();
builder.canonicalize();
builder.build(context, newQueue);
priorityQueue.resetQueue(newQueue, builder.getSv4().createNewWrapperCurrent());
builder.getSv4().clear();
selectionVector4.clear();
} finally {
DrillAutoCloseables.closeNoChecked(builder);
}
logger.debug("Took {} us to purge", watch.elapsed(TimeUnit.MICROSECONDS));
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class StreamingAggBatch method constructSpecialBatch.
/**
* Method is invoked when we have a straight aggregate (no group by expression) and our input is empty.
* In this case we construct an outgoing batch with record count as 1. For the nullable vectors we don't set anything
* as we want the output to be NULL. For the required vectors (only for count()) we set the value to be zero since
* we don't zero out our buffers initially while allocating them.
*/
@SuppressWarnings("resource")
private void constructSpecialBatch() {
int exprIndex = 0;
for (final VectorWrapper<?> vw : container) {
final ValueVector vv = vw.getValueVector();
AllocationHelper.allocateNew(vv, SPECIAL_BATCH_COUNT);
vv.getMutator().setValueCount(SPECIAL_BATCH_COUNT);
if (vv.getField().getType().getMode() == TypeProtos.DataMode.REQUIRED) {
if (vv instanceof FixedWidthVector) {
/*
* The only case we should have a required vector in the aggregate is for count function whose output is
* always a FixedWidthVector (BigIntVector). Zero out the vector.
*/
((FixedWidthVector) vv).zeroVector();
} else {
/*
* If we are in this else block it means that we have a required vector which is of variable length. We
* should not be here, raising an error since we have set the record count to be 1 and not cleared the
* buffer
*/
throw new DrillRuntimeException("FixedWidth vectors is the expected output vector type. " + "Corresponding expression: " + popConfig.getExprs().get(exprIndex).toString());
}
}
exprIndex++;
}
container.setRecordCount(SPECIAL_BATCH_COUNT);
recordCount = SPECIAL_BATCH_COUNT;
}
use of org.apache.drill.exec.vector.ValueVector in project drill by apache.
the class FlattenRecordBatch method setValueCount.
private void setValueCount(int count) {
for (ValueVector v : allocationVectors) {
ValueVector.Mutator m = v.getMutator();
m.setValueCount(count);
}
if (complexWriters == null) {
return;
}
for (ComplexWriter writer : complexWriters) {
writer.setValueCount(count);
}
}
Aggregations