use of org.apache.drill.exec.physical.impl.common.HashTableConfig in project drill by axbaretto.
the class HashAggBatch method createAggregatorInternal.
private HashAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException {
CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getOptions());
ClassGenerator<HashAggregator> cg = top.getRoot();
ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
top.plainJavaCapable(true);
container.clear();
int numGroupByExprs = (popConfig.getGroupByExprs() != null) ? popConfig.getGroupByExprs().size() : 0;
int numAggrExprs = (popConfig.getAggrExprs() != null) ? popConfig.getAggrExprs().size() : 0;
aggrExprs = new LogicalExpression[numAggrExprs];
groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
aggrOutFieldIds = new TypedFieldId[numAggrExprs];
ErrorCollector collector = new ErrorCollectorImpl();
int i;
for (i = 0; i < numGroupByExprs; i++) {
NamedExpression ne = popConfig.getGroupByExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
// add this group-by vector to the output container
groupByOutFieldIds[i] = container.add(vv);
}
// each of SUM, MAX and MIN gets an extra bigint column
int extraNonNullColumns = 0;
for (i = 0; i < numAggrExprs; i++) {
NamedExpression ne = popConfig.getAggrExprs().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
if (collector.hasErrors()) {
throw new SchemaChangeException("Failure while materializing expression. " + collector.toErrorString());
}
if (expr == null) {
continue;
}
if (expr instanceof FunctionHolderExpression) {
String funcName = ((FunctionHolderExpression) expr).getName();
if (funcName.equals("sum") || funcName.equals("max") || funcName.equals("min")) {
extraNonNullColumns++;
}
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
@SuppressWarnings("resource") ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
aggrOutFieldIds[i] = container.add(vv);
aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
}
setupUpdateAggrValues(cgInner);
setupGetIndex(cg);
cg.getBlock("resetValues")._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
HashAggregator agg = context.getImplementationClass(top);
HashTableConfig htConfig = // TODO - fix the validator on this option
new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, popConfig.getGroupByExprs(), null, /* no probe exprs */
comparators);
agg.setup(popConfig, htConfig, context, oContext, incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), groupByOutFieldIds, this.container, extraNonNullColumns * 8);
return agg;
}
use of org.apache.drill.exec.physical.impl.common.HashTableConfig in project drill by axbaretto.
the class HashJoinBatch method setupHashTable.
public void setupHashTable() throws IOException, SchemaChangeException, ClassTransformationException {
// Setup the hash table configuration object
int conditionsSize = conditions.size();
final List<NamedExpression> rightExpr = new ArrayList<>(conditionsSize);
List<NamedExpression> leftExpr = new ArrayList<>(conditionsSize);
// Create named expressions from the conditions
for (int i = 0; i < conditionsSize; i++) {
rightExpr.add(new NamedExpression(conditions.get(i).getRight(), new FieldReference("build_side_" + i)));
leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
}
// Set the left named expression to be null if the probe batch is empty.
if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
leftExpr = null;
} else {
if (left.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
final String errorMsg = new StringBuilder().append("Hash join does not support probe batch with selection vectors. ").append("Probe batch has selection mode = ").append(left.getSchema().getSelectionVectorMode()).toString();
throw new SchemaChangeException(errorMsg);
}
}
final HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators);
// Create the chained hash table
final ChainedHashTable ht = new ChainedHashTable(htConfig, context, oContext.getAllocator(), this.right, this.left, null);
hashTable = ht.createAndSetupHashTable(null, 1);
}
use of org.apache.drill.exec.physical.impl.common.HashTableConfig in project drill by apache.
the class HashJoinBatch method setupHashTable.
private void setupHashTable() {
List<Comparator> comparators = Lists.newArrayListWithExpectedSize(conditions.size());
conditions.forEach(cond -> comparators.add(JoinUtils.checkAndReturnSupportedJoinComparator(cond)));
if (skipHashTableBuild) {
return;
}
// Setup the hash table configuration object
List<NamedExpression> leftExpr = new ArrayList<>(conditions.size());
// Create named expressions from the conditions
for (int i = 0; i < conditions.size(); i++) {
leftExpr.add(new NamedExpression(conditions.get(i).getLeft(), new FieldReference("probe_side_" + i)));
}
// Set the left named expression to be null if the probe batch is empty.
if (leftUpstream != IterOutcome.OK_NEW_SCHEMA && leftUpstream != IterOutcome.OK) {
leftExpr = null;
} else {
if (probeBatch.getSchema().getSelectionVectorMode() != BatchSchema.SelectionVectorMode.NONE) {
throw UserException.internalError(null).message("Hash join does not support probe batch with selection vectors.").addContext("Probe batch has selection mode", (probeBatch.getSchema().getSelectionVectorMode()).toString()).build(logger);
}
}
HashTableConfig htConfig = new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), true, HashTable.DEFAULT_LOAD_FACTOR, rightExpr, leftExpr, comparators, joinControl.asInt());
// Create the chained hash table
baseHashTable = new ChainedHashTable(htConfig, context, allocator, buildBatch, probeBatch, null);
if (enableRuntimeFilter) {
setupHash64(htConfig);
}
}
use of org.apache.drill.exec.physical.impl.common.HashTableConfig in project drill by apache.
the class HashAggBatch method createAggregatorInternal.
protected HashAggregator createAggregatorInternal() {
CodeGenerator<HashAggregator> top = CodeGenerator.get(HashAggregator.TEMPLATE_DEFINITION, context.getOptions());
ClassGenerator<HashAggregator> cg = top.getRoot();
ClassGenerator<HashAggregator> cgInner = cg.getInnerGenerator("BatchHolder");
top.plainJavaCapable(true);
// Uncomment the following line to allow debugging of the template code
// top.saveCodeForDebugging(true);
container.clear();
numGroupByExprs = (getKeyExpressions() != null) ? getKeyExpressions().size() : 0;
numAggrExprs = (getValueExpressions() != null) ? getValueExpressions().size() : 0;
aggrExprs = new LogicalExpression[numAggrExprs];
groupByOutFieldIds = new TypedFieldId[numGroupByExprs];
aggrOutFieldIds = new TypedFieldId[numAggrExprs];
ErrorCollector collector = new ErrorCollectorImpl();
for (int i = 0; i < numGroupByExprs; i++) {
NamedExpression ne = getKeyExpressions().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr == null) {
continue;
}
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
// add this group-by vector to the output container
groupByOutFieldIds[i] = container.add(vv);
columnMapping.put(outputField.getName(), ne.getExpr().toString().replace('`', ' ').trim());
}
// each of SUM, MAX and MIN gets an extra bigint column
int extraNonNullColumns = 0;
for (int i = 0; i < numAggrExprs; i++) {
NamedExpression ne = getValueExpressions().get(i);
final LogicalExpression expr = ExpressionTreeMaterializer.materialize(ne.getExpr(), incoming, collector, context.getFunctionRegistry());
if (expr instanceof IfExpression) {
throw UserException.unsupportedError(new UnsupportedOperationException("Union type not supported in aggregate functions")).build(logger);
}
collector.reportErrors(logger);
if (expr == null) {
continue;
}
// Populate the complex writers for complex exprs
if (expr instanceof DrillFuncHolderExpr && ((DrillFuncHolderExpr) expr).getHolder().isComplexWriterFuncHolder()) {
if (complexWriters == null) {
complexWriters = new ArrayList<>();
} else {
complexWriters.clear();
}
// The reference name will be passed to ComplexWriter, used as the name of the output vector from the writer.
((DrillFuncHolderExpr) expr).setFieldReference(ne.getRef());
MaterializedField field = MaterializedField.create(ne.getRef().getAsNamePart().getName(), UntypedNullHolder.TYPE);
container.add(new UntypedNullVector(field, container.getAllocator()));
aggrExprs[i] = expr;
} else {
MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), expr.getMajorType());
ValueVector vv = TypeHelper.getNewVector(outputField, oContext.getAllocator());
aggrOutFieldIds[i] = container.add(vv);
aggrExprs[i] = new ValueVectorWriteExpression(aggrOutFieldIds[i], expr, true);
if (expr instanceof FunctionHolderExpression) {
String funcName = ((FunctionHolderExpression) expr).getName();
if (funcName.equals("sum") || funcName.equals("max") || funcName.equals("min")) {
extraNonNullColumns++;
}
List<LogicalExpression> args = ((FunctionCall) ne.getExpr()).args();
if (!args.isEmpty()) {
if (args.get(0) instanceof SchemaPath) {
columnMapping.put(outputField.getName(), ((SchemaPath) args.get(0)).getAsNamePart().getName());
} else if (args.get(0) instanceof FunctionCall) {
FunctionCall functionCall = (FunctionCall) args.get(0);
if (functionCall.arg(0) instanceof SchemaPath) {
columnMapping.put(outputField.getName(), ((SchemaPath) functionCall.arg(0)).getAsNamePart().getName());
}
}
}
} else {
columnMapping.put(outputField.getName(), ne.getRef().getAsNamePart().getName());
}
}
}
setupUpdateAggrValues(cgInner);
setupGetIndex(cg);
cg.getBlock("resetValues")._return(JExpr.TRUE);
container.buildSchema(SelectionVectorMode.NONE);
HashAggregator agg = context.getImplementationClass(top);
HashTableConfig htConfig = // TODO - fix the validator on this option
new HashTableConfig((int) context.getOptions().getOption(ExecConstants.MIN_HASH_TABLE_SIZE), HashTable.DEFAULT_LOAD_FACTOR, getKeyExpressions(), null, /* no probe exprs */
comparators);
agg.setup(popConfig, htConfig, context, oContext, incoming, this, aggrExprs, cgInner.getWorkspaceTypes(), cgInner, groupByOutFieldIds, this.container, extraNonNullColumns * 8);
return agg;
}
Aggregations