use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class SortGroupByPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
List<LogicalVariable> gbyCols = getGbyColumns();
int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
GroupByOperator gby = (GroupByOperator) op;
int numFds = gby.getDecorList().size();
int[] fdColumns = new int[numFds];
int j = 0;
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
throw new AlgebricksException("Sort group-by expects variable references.");
}
VariableReferenceExpression v = (VariableReferenceExpression) expr;
LogicalVariable decor = v.getVariableReference();
fdColumns[j++] = inputSchemas[0].findVariable(decor);
}
if (gby.getNestedPlans().size() != 1) {
throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() != 1) {
throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
}
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
List<Object> intermediateTypes = new ArrayList<Object>();
int n = aggOp.getExpressions().size();
IAggregateEvaluatorFactory[] aff = new IAggregateEvaluatorFactory[n];
int i = 0;
IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
aff[i++] = expressionRuntimeProvider.createAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
}
int[] keyAndDecFields = new int[keys.length + fdColumns.length];
for (i = 0; i < keys.length; ++i) {
keyAndDecFields[i] = keys[i];
}
for (i = 0; i < fdColumns.length; i++) {
keyAndDecFields[keys.length + i] = fdColumns[i];
}
List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
keyAndDecVariables.add(p.first);
}
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
}
for (LogicalVariable var : keyAndDecVariables) {
aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
}
compileSubplans(inputSchemas[0], gby, opSchema, context);
IOperatorDescriptorRegistry spec = builder.getJobSpec();
IBinaryComparatorFactory[] compFactories = new IBinaryComparatorFactory[gbyCols.size()];
IBinaryComparatorFactoryProvider bcfProvider = context.getBinaryComparatorFactoryProvider();
i = 0;
for (LogicalVariable v : gbyCols) {
Object type = aggOpInputEnv.getVarType(v);
if (orderColumns[i].getOrder() == OrderKind.ASC) {
compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
} else {
compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, false);
}
i++;
}
RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
IAggregateEvaluatorFactory[] merges = new IAggregateEvaluatorFactory[n];
List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
localInputSchemas[0] = new OperatorSchemaImpl();
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
aggFun.getUsedVariables(usedVars);
}
i = 0;
for (Object type : intermediateTypes) {
aggOpInputEnv.setVarType(usedVars.get(i++), type);
}
for (LogicalVariable keyVar : keyAndDecVariables) {
localInputSchemas[0].addVariable(keyVar);
}
for (LogicalVariable usedVar : usedVars) {
localInputSchemas[0].addVariable(usedVar);
}
for (i = 0; i < n; i++) {
AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
merges[i] = expressionRuntimeProvider.createAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
}
RecordDescriptor partialAggRecordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), localInputSchemas[0], context);
IAggregatorDescriptorFactory aggregatorFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(aff, keyAndDecFields);
IAggregatorDescriptorFactory mergeFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(merges, keyAndDecFields);
INormalizedKeyComputerFactory normalizedKeyFactory = null;
INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
if (nkcfProvider == null) {
normalizedKeyFactory = null;
}
Object type = aggOpInputEnv.getVarType(gbyCols.get(0));
normalizedKeyFactory = orderColumns[0].getOrder() == OrderKind.ASC ? nkcfProvider.getNormalizedKeyComputerFactory(type, true) : nkcfProvider.getNormalizedKeyComputerFactory(type, false);
SortGroupByOperatorDescriptor gbyOpDesc = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keyAndDecFields, normalizedKeyFactory, compFactories, aggregatorFactory, mergeFactory, partialAggRecordDescriptor, recordDescriptor, false);
contributeOpDesc(builder, gby, gbyOpDesc);
ILogicalOperator src = op.getInputs().get(0).getValue();
builder.contributeGraphEdge(src, 0, op, 0);
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class AbstractIntroduceGroupByCombinerRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (context.checkIfInDontApplySet(this, op)) {
return false;
}
if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
return false;
}
GroupByOperator gbyOp = (GroupByOperator) op;
ExecutionMode executionMode = gbyOp.getExecutionMode();
if (executionMode != ExecutionMode.PARTITIONED && !(executionMode == ExecutionMode.UNPARTITIONED && gbyOp.isGroupAll())) {
return false;
}
BookkeepingInfo bi = new BookkeepingInfo();
GroupByOperator newGbyOp = opToPush(gbyOp, bi, context);
if (newGbyOp == null) {
return false;
}
Set<LogicalVariable> newGbyLiveVars = new ListSet<LogicalVariable>();
VariableUtilities.getLiveVariables(newGbyOp, newGbyLiveVars);
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyOp.getDecorList()) {
List<LogicalVariable> usedDecorVars = new ArrayList<LogicalVariable>();
// p.second.getValue() should always return a VariableReferenceExpression, hence
// usedDecorVars should always contain only one variable.
p.second.getValue().getUsedVariables(usedDecorVars);
if (!newGbyLiveVars.contains(usedDecorVars.get(0))) {
// Let the left-hand side of gbyOp's decoration expressions populated through the combiner group-by without
// any intermediate assignment.
newGbyOp.addDecorExpression(null, p.second.getValue());
}
}
newGbyOp.setExecutionMode(ExecutionMode.LOCAL);
Object v = gbyOp.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY);
newGbyOp.getAnnotations().put(OperatorAnnotations.USE_HASH_GROUP_BY, v);
Object v2 = gbyOp.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY);
newGbyOp.getAnnotations().put(OperatorAnnotations.USE_EXTERNAL_GROUP_BY, v2);
List<LogicalVariable> propagatedVars = new LinkedList<LogicalVariable>();
VariableUtilities.getProducedVariables(newGbyOp, propagatedVars);
Set<LogicalVariable> freeVars = new HashSet<LogicalVariable>();
OperatorPropertiesUtil.getFreeVariablesInSubplans(gbyOp, freeVars);
for (LogicalVariable var : freeVars) {
if (!propagatedVars.contains(var)) {
LogicalVariable newDecorVar = context.newVar();
newGbyOp.addDecorExpression(newDecorVar, new VariableReferenceExpression(var));
VariableUtilities.substituteVariables(gbyOp.getNestedPlans().get(0).getRoots().get(0).getValue(), var, newDecorVar, context);
}
}
Mutable<ILogicalOperator> opRef3 = gbyOp.getInputs().get(0);
opRef3.setValue(newGbyOp);
typeGby(newGbyOp, context);
typeGby(gbyOp, context);
context.addToDontApplySet(this, op);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class AbstractIntroduceGroupByCombinerRule method opToPush.
private GroupByOperator opToPush(GroupByOperator gbyOp, BookkeepingInfo bi, IOptimizationContext context) throws AlgebricksException {
// Hook up input to new group-by.
Mutable<ILogicalOperator> opRef3 = gbyOp.getInputs().get(0);
ILogicalOperator op3 = opRef3.getValue();
GroupByOperator newGbyOp = new GroupByOperator();
newGbyOp.getInputs().add(new MutableObject<ILogicalOperator>(op3));
// Copy annotations.
Map<String, Object> annotations = newGbyOp.getAnnotations();
annotations.putAll(gbyOp.getAnnotations());
List<LogicalVariable> gbyVars = gbyOp.getGbyVarList();
// Backup nested plans since tryToPushSubplan(...) may mutate them.
List<ILogicalPlan> copiedNestedPlans = new ArrayList<>();
for (ILogicalPlan nestedPlan : gbyOp.getNestedPlans()) {
ILogicalPlan copiedNestedPlan = OperatorManipulationUtil.deepCopy(nestedPlan, gbyOp);
OperatorManipulationUtil.computeTypeEnvironment(copiedNestedPlan, context);
copiedNestedPlans.add(copiedNestedPlan);
}
for (ILogicalPlan p : gbyOp.getNestedPlans()) {
// NOTE: tryToPushSubplan(...) can mutate the nested subplan p.
Pair<Boolean, ILogicalPlan> bip = tryToPushSubplan(p, gbyOp, newGbyOp, bi, gbyVars, context);
if (!bip.first) {
// For now, if we cannot push everything, give up.
// Resets the group-by operator with backup nested plans.
gbyOp.getNestedPlans().clear();
gbyOp.getNestedPlans().addAll(copiedNestedPlans);
return null;
}
ILogicalPlan pushedSubplan = bip.second;
if (pushedSubplan != null) {
newGbyOp.getNestedPlans().add(pushedSubplan);
}
}
ArrayList<LogicalVariable> newOpGbyList = new ArrayList<LogicalVariable>();
ArrayList<LogicalVariable> replGbyList = new ArrayList<LogicalVariable>();
// Find maximal sequence of variable.
for (Map.Entry<GroupByOperator, List<LogicalVariable>> e : bi.modifyGbyMap.entrySet()) {
List<LogicalVariable> varList = e.getValue();
boolean see1 = true;
int sz1 = newOpGbyList.size();
int i = 0;
for (LogicalVariable v : varList) {
if (see1) {
if (i < sz1) {
LogicalVariable v2 = newOpGbyList.get(i);
if (v != v2) {
// cannot linearize
return null;
}
} else {
see1 = false;
newOpGbyList.add(v);
replGbyList.add(context.newVar());
}
i++;
} else {
newOpGbyList.add(v);
replGbyList.add(context.newVar());
}
}
}
// set the vars in the new op
int n = newOpGbyList.size();
for (int i = 0; i < n; i++) {
newGbyOp.addGbyExpression(replGbyList.get(i), new VariableReferenceExpression(newOpGbyList.get(i)));
VariableUtilities.substituteVariables(gbyOp, newOpGbyList.get(i), replGbyList.get(i), false, context);
}
// Sets the global flag to be false.
newGbyOp.setGlobal(false);
// Sets the group all flag.
newGbyOp.setGroupAll(gbyOp.isGroupAll());
return newGbyOp;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class InlineAssignIntoAggregateRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
return false;
}
boolean changed = false;
GroupByOperator gbyOp = (GroupByOperator) op;
for (ILogicalPlan p : gbyOp.getNestedPlans()) {
for (Mutable<ILogicalOperator> r : p.getRoots()) {
if (inlined(r)) {
changed = true;
}
}
}
return changed;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class EliminateGroupByEmptyKeyRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
return false;
}
GroupByOperator groupOp = (GroupByOperator) op;
// Only groupAll has equivalent semantics to aggregate.
if (!groupOp.isGroupAll()) {
return false;
}
List<LogicalVariable> groupVars = groupOp.getGbyVarList();
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorList = groupOp.getDecorList();
if (!groupVars.isEmpty() || !decorList.isEmpty()) {
return false;
}
List<ILogicalPlan> nestedPlans = groupOp.getNestedPlans();
if (nestedPlans.size() > 1) {
return false;
}
ILogicalPlan nestedPlan = nestedPlans.get(0);
if (nestedPlan.getRoots().size() > 1) {
return false;
}
Mutable<ILogicalOperator> topOpRef = nestedPlan.getRoots().get(0);
ILogicalOperator topOp = nestedPlan.getRoots().get(0).getValue();
Mutable<ILogicalOperator> nestedTupleSourceRef = getNestedTupleSourceReference(topOpRef);
/**
* connect nested top op into the plan
*/
opRef.setValue(topOp);
/**
* connect child op into the plan
*/
nestedTupleSourceRef.setValue(groupOp.getInputs().get(0).getValue());
return true;
}
Aggregations