use of org.apache.drill.exec.vector.UntypedNullVector in project drill by apache.
the class StreamingAggBatch method createAggregatorInternal.
protected StreamingAggregator createAggregatorInternal() {
ClassGenerator<StreamingAggregator> cg = CodeGenerator.getRoot(StreamingAggTemplate.TEMPLATE_DEFINITION, context.getOptions());
// Streaming agg no longer plain Java capable. Stats generates code
// that fails when compiled normally.
// cannot override resetValues() in org.apache.drill.exec.physical.impl.aggregate.StreamingAggTemplate
// public boolean resetValues()
// ^
// overridden method does not throw org.apache.drill.exec.exception.SchemaChangeException (compiler.err.override.meth.doesnt.throw)
// cg.getCodeGenerator().plainJavaCapable(true);
// Uncomment out this line to debug the generated code.
// cg.getCodeGenerator().saveCodeForDebugging(true);
container.clear();
LogicalExpression[] keyExprs = new LogicalExpression[getKeyExpressions().size()];
LogicalExpression[] valueExprs = new LogicalExpression[getValueExpressions().size()];
TypedFieldId[] keyOutputIds = new TypedFieldId[getKeyExpressions().size()];
ErrorCollector collector = new ErrorCollectorImpl();
for (int i = 0; i < keyExprs.length; i++) {
NamedExpression ne = getKeyExpressions().get(i);
LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
keyExprs[i] = expr;
MaterializedField outputField = MaterializedField.create(ne.getRef().getLastSegment().getNameSegment().getPath(), expr.getMajorType());
container.addOrGet(outputField);
keyOutputIds[i] = container.getValueVectorId(ne.getRef());
}
for (int i = 0; i < valueExprs.length; i++) {
NamedExpression ne = getValueExpressions().get(i);
LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry(), true, false);
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
if (expr == null) {
continue;
}
// Populate the complex writers for complex exprs
if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
// Lazy initialization of the list of complex writers, if not done yet.
if (complexWriters == null) {
complexWriters = Lists.newArrayList();
} else {
complexWriters.clear();
}
// The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
((DrillFuncHolderExpr) expr).setFieldReference(ne.getRef());
MaterializedField field = MaterializedField.create(ne.getRef().getAsNamePart().getName(), UntypedNullHolder.TYPE);
container.add(new UntypedNullVector(field, container.getAllocator()));
valueExprs[i] = expr;
} else {
MaterializedField outputField = MaterializedField.create(ne.getRef().getLastSegment().getNameSegment().getPath(), expr.getMajorType());
container.addOrGet(outputField);
TypedFieldId id = container.getValueVectorId(ne.getRef());
valueExprs[i] = new ValueVectorWriteExpression(id, expr, true);
}
}
collector.reportErrors(logger);
setupIsSame(cg, keyExprs);
setupIsSameApart(cg, keyExprs);
addRecordValues(cg, valueExprs);
outputRecordKeys(cg, keyOutputIds, keyExprs);
outputRecordKeysPrev(cg, keyOutputIds, keyExprs);
cg.getBlock("resetValues")._return(JExpr.TRUE);
getIndex(cg);
container.buildSchema(SelectionVectorMode.NONE);
StreamingAggregator agg = context.getImplementationClass(cg);
try {
agg.setup(oContext, incoming, this, maxOutputRowCount);
} catch (SchemaChangeException e) {
throw schemaChangeException(e, logger);
}
allocateComplexWriters();
return agg;
}
use of org.apache.drill.exec.vector.UntypedNullVector in project drill by apache.
the class RecordBatchLoader method load.
/**
* Load a record batch from a single buffer.
*
* @param def
* The definition for the record batch.
* @param buf
* The buffer that holds the data associated with the record batch.
* @return Whether the schema changed since the previous load.
*/
@SuppressWarnings("resource")
public boolean load(RecordBatchDef def, DrillBuf buf) {
if (logger.isTraceEnabled()) {
logger.trace("Loading record batch with def {} and data {}", def, buf);
logger.trace("Load, ThreadID: {}\n{}", Thread.currentThread().getId(), new StackTrace());
}
container.zeroVectors();
valueCount = def.getRecordCount();
boolean schemaChanged = schema == null;
// Load vectors from the batch buffer, while tracking added and/or removed
// vectors (relative to the previous call) in order to determine whether the
// the schema has changed since the previous call.
// Set up to recognize previous fields that no longer exist.
Map<String, ValueVector> oldFields = CaseInsensitiveMap.newHashMap();
for (VectorWrapper<?> wrapper : container) {
ValueVector vector = wrapper.getValueVector();
oldFields.put(vector.getField().getName(), vector);
}
VectorContainer newVectors = new VectorContainer();
try {
List<SerializedField> fields = def.getFieldList();
int bufOffset = 0;
for (SerializedField field : fields) {
MaterializedField fieldDef = MaterializedField.create(field);
ValueVector vector = oldFields.remove(fieldDef.getName());
if (vector == null) {
// Field did not exist previously--is schema change.
schemaChanged = true;
vector = TypeHelper.getNewVector(fieldDef, allocator);
} else if (!vector.getField().getType().equals(fieldDef.getType())) {
// Field had different type before--is schema change.
// clear previous vector
vector.clear();
schemaChanged = true;
vector = TypeHelper.getNewVector(fieldDef, allocator);
// If the field is a map or a dict, check if the schema changed.
} else if ((vector.getField().getType().getMinorType() == MinorType.MAP || vector.getField().getType().getMinorType() == MinorType.DICT) && !isSameSchema(vector.getField().getChildren(), field.getChildList())) {
// The schema changed. Discard the old one and create a new one.
schemaChanged = true;
vector.clear();
vector = TypeHelper.getNewVector(fieldDef, allocator);
}
// Load the vector.
if (buf == null) {
// field value alone is sufficient to load the vector
if (vector instanceof UntypedNullVector) {
vector.load(field, null);
}
// Schema only
} else if (field.getValueCount() == 0) {
AllocationHelper.allocate(vector, 0, 0, 0);
} else {
vector.load(field, buf.slice(bufOffset, field.getBufferLength()));
}
bufOffset += field.getBufferLength();
newVectors.add(vector);
}
// rebuild the schema.
SchemaBuilder builder = BatchSchema.newBuilder();
for (VectorWrapper<?> v : newVectors) {
builder.addField(v.getField());
}
builder.setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
schema = builder.build();
newVectors.buildSchema(BatchSchema.SelectionVectorMode.NONE);
container = newVectors;
container.setRecordCount(valueCount);
} catch (final Throwable cause) {
// We have to clean up new vectors created here and pass over the actual cause.
// It is upper layer who should adjudicate to call upper layer specific clean up logic.
VectorAccessibleUtilities.clear(newVectors);
throw cause;
} finally {
if (!oldFields.isEmpty()) {
schemaChanged = true;
for (ValueVector vector : oldFields.values()) {
vector.clear();
}
}
}
return schemaChanged;
}
use of org.apache.drill.exec.vector.UntypedNullVector in project drill by apache.
the class HashAggBatch method createAggregatorInternal.
protected HashAggregator createAggregatorInternal() {
CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getOptions());
ClassGenerator<HashAggregator> cg = top.getRoot();
ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
top.plainJavaCapable(true);
// Uncomment the following line to allow debugging of the template code
// top.saveCodeForDebugging(true);
container.clear();
numGroupByExprs = (getKeyExpressions() != null) ? getKeyExpressions().size() : 0;
numAggrExprs = (getValueExpressions() != null) ? getValueExpressions().size() : 0;
aggrExprs = new LogicalExpression[numAggrExprs];
groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
aggrOutFieldIds = new TypedFieldId[numAggrExprs];
ErrorCollector collector = new ErrorCollectorImpl();
for (int i = 0; i < numGroupByExprs; i++) {
NamedExpression ne = getKeyExpressions().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
// add this group-by vector to the output container
groupByOutFieldIds[i] = container.add(vv);
columnMapping.put(outputField.getName(), ne.getExpr().toString().replace('`', ' ').trim());
}
// each of SUM, MAX and MIN gets an extra bigint column
int extraNonNullColumns = 0;
for (int i = 0; i < numAggrExprs; i++) {
NamedExpression ne = getValueExpressions().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
collector.reportErrors(logger);
if (expr == null) {
continue;
}
// Populate the complex writers for complex exprs
if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
if (complexWriters == null) {
complexWriters = new ArrayList<>();
} else {
complexWriters.clear();
}
// The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
((DrillFuncHolderExpr) expr).setFieldReference(ne.getRef());
MaterializedField field = MaterializedField.create(ne.getRef().getAsNamePart().getName(), UntypedNullHolder.TYPE);
container.add(new UntypedNullVector(field, container.getAllocator()));
aggrExprs[i] = expr;
} else {
MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
aggrOutFieldIds[i] = container.add(vv);
aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
if (expr instanceof FunctionHolderExpression) {
String funcName = ((FunctionHolderExpression) expr).getName();
if (funcName.equals("sum") || funcName.equals("max") || funcName.equals("min")) {
extraNonNullColumns++;
}
List<LogicalExpression> args = ((FunctionCall) ne.getExpr()).args();
if (!args.isEmpty()) {
if (args.get(0) instanceof SchemaPath) {
columnMapping.put(outputField.getName(), ((SchemaPath) args.get(0)).getAsNamePart().getName());
} else if (args.get(0) instanceof FunctionCall) {
FunctionCall functionCall = (FunctionCall) args.get(0);
if (functionCall.arg(0) instanceof SchemaPath) {
columnMapping.put(outputField.getName(), ((SchemaPath) functionCall.arg(0)).getAsNamePart().getName());
}
}
}
} else {
columnMapping.put(outputField.getName(), ne.getRef().getAsNamePart().getName());
}
}
}
setupUpdateAggrValues(cgInner);
setupGetIndex(cg);
cg.getBlock("resetValues")._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
HashAggregator agg = context.getImplementationClass(top);
HashTableConfig htConfig = // TODO - fix the validator on this option
new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, getKeyExpressions(), null, /* no probe exprs */
comparators);
agg.setup(popConfig, htConfig, context, oContext, incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), cgInner, groupByOutFieldIds, this.container, extraNonNullColumns * 8);
return agg;
}
Aggregations