Search in sources :

Example 36 with AggregateOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator in project asterixdb by apache.

the class SortGroupByPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    List<LogicalVariable> gbyCols = getGbyColumns();
    int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
    GroupByOperator gby = (GroupByOperator) op;
    int numFds = gby.getDecorList().size();
    int[] fdColumns = new int[numFds];
    int j = 0;
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        ILogicalExpression expr = p.second.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new AlgebricksException("Sort group-by expects variable references.");
        }
        VariableReferenceExpression v = (VariableReferenceExpression) expr;
        LogicalVariable decor = v.getVariableReference();
        fdColumns[j++] = inputSchemas[0].findVariable(decor);
    }
    if (gby.getNestedPlans().size() != 1) {
        throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    ILogicalPlan p0 = gby.getNestedPlans().get(0);
    if (p0.getRoots().size() != 1) {
        throw new AlgebricksException("Sort group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
    IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
    List<Object> intermediateTypes = new ArrayList<Object>();
    int n = aggOp.getExpressions().size();
    IAggregateEvaluatorFactory[] aff = new IAggregateEvaluatorFactory[n];
    int i = 0;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
    IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
        aff[i++] = expressionRuntimeProvider.createAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
        intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
    }
    int[] keyAndDecFields = new int[keys.length + fdColumns.length];
    for (i = 0; i < keys.length; ++i) {
        keyAndDecFields[i] = keys[i];
    }
    for (i = 0; i < fdColumns.length; i++) {
        keyAndDecFields[keys.length + i] = fdColumns[i];
    }
    List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
        keyAndDecVariables.add(p.first);
    }
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
    }
    for (LogicalVariable var : keyAndDecVariables) {
        aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
    }
    compileSubplans(inputSchemas[0], gby, opSchema, context);
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IBinaryComparatorFactory[] compFactories = new IBinaryComparatorFactory[gbyCols.size()];
    IBinaryComparatorFactoryProvider bcfProvider = context.getBinaryComparatorFactoryProvider();
    i = 0;
    for (LogicalVariable v : gbyCols) {
        Object type = aggOpInputEnv.getVarType(v);
        if (orderColumns[i].getOrder() == OrderKind.ASC) {
            compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
        } else {
            compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, false);
        }
        i++;
    }
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    IAggregateEvaluatorFactory[] merges = new IAggregateEvaluatorFactory[n];
    List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
    IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
    localInputSchemas[0] = new OperatorSchemaImpl();
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        aggFun.getUsedVariables(usedVars);
    }
    i = 0;
    for (Object type : intermediateTypes) {
        aggOpInputEnv.setVarType(usedVars.get(i++), type);
    }
    for (LogicalVariable keyVar : keyAndDecVariables) {
        localInputSchemas[0].addVariable(keyVar);
    }
    for (LogicalVariable usedVar : usedVars) {
        localInputSchemas[0].addVariable(usedVar);
    }
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        merges[i] = expressionRuntimeProvider.createAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
    }
    RecordDescriptor partialAggRecordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), localInputSchemas[0], context);
    IAggregatorDescriptorFactory aggregatorFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(aff, keyAndDecFields);
    IAggregatorDescriptorFactory mergeFactory = new SimpleAlgebricksAccumulatingAggregatorFactory(merges, keyAndDecFields);
    INormalizedKeyComputerFactory normalizedKeyFactory = null;
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    if (nkcfProvider == null) {
        normalizedKeyFactory = null;
    }
    Object type = aggOpInputEnv.getVarType(gbyCols.get(0));
    normalizedKeyFactory = orderColumns[0].getOrder() == OrderKind.ASC ? nkcfProvider.getNormalizedKeyComputerFactory(type, true) : nkcfProvider.getNormalizedKeyComputerFactory(type, false);
    SortGroupByOperatorDescriptor gbyOpDesc = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keyAndDecFields, normalizedKeyFactory, compFactories, aggregatorFactory, mergeFactory, partialAggRecordDescriptor, recordDescriptor, false);
    contributeOpDesc(builder, gby, gbyOpDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) ArrayList(java.util.ArrayList) SimpleAlgebricksAccumulatingAggregatorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.SimpleAlgebricksAccumulatingAggregatorFactory) IAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IPartialAggregationTypeComputer(org.apache.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) OperatorSchemaImpl(org.apache.hyracks.algebricks.core.jobgen.impl.OperatorSchemaImpl) IAggregatorDescriptorFactory(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) SortGroupByOperatorDescriptor(org.apache.hyracks.dataflow.std.group.sort.SortGroupByOperatorDescriptor)

Example 37 with AggregateOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator in project asterixdb by apache.

the class InlineAssignIntoAggregateRule method inlined.

private boolean inlined(Mutable<ILogicalOperator> r) throws AlgebricksException {
    AbstractLogicalOperator op1 = (AbstractLogicalOperator) r.getValue();
    if (op1.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
        return false;
    }
    AbstractLogicalOperator op2 = (AbstractLogicalOperator) op1.getInputs().get(0).getValue();
    if (op2.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
        return false;
    }
    AggregateOperator agg = (AggregateOperator) op1;
    AssignOperator assign = (AssignOperator) op2;
    VarExprSubstitution ves = new VarExprSubstitution(assign.getVariables(), assign.getExpressions());
    for (Mutable<ILogicalExpression> exprRef : agg.getExpressions()) {
        ILogicalExpression expr = exprRef.getValue();
        Pair<Boolean, ILogicalExpression> p = expr.accept(ves, null);
        if (p.first == true) {
            exprRef.setValue(p.second);
        }
    // AbstractLogicalExpression ale = (AbstractLogicalExpression) expr;
    // ale.accept(ves, null);
    }
    List<Mutable<ILogicalOperator>> op1InpList = op1.getInputs();
    op1InpList.clear();
    op1InpList.add(op2.getInputs().get(0));
    return true;
}
Also used : Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator)

Example 38 with AggregateOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator in project asterixdb by apache.

the class CancelUnnestWithNestedListifyRule method applies.

private boolean applies(Mutable<ILogicalOperator> opRef, Set<LogicalVariable> varUsedAbove, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue();
    if (op1.getOperatorTag() != LogicalOperatorTag.UNNEST) {
        return false;
    }
    UnnestOperator unnest1 = (UnnestOperator) op1;
    ILogicalExpression expr = unnest1.getExpressionRef().getValue();
    LogicalVariable unnestedVar;
    switch(expr.getExpressionTag()) {
        case VARIABLE:
            unnestedVar = ((VariableReferenceExpression) expr).getVariableReference();
            break;
        case FUNCTION_CALL:
            if (((AbstractFunctionCallExpression) expr).getFunctionIdentifier() != BuiltinFunctions.SCAN_COLLECTION) {
                return false;
            }
            AbstractFunctionCallExpression functionCall = (AbstractFunctionCallExpression) expr;
            ILogicalExpression functionCallArgExpr = functionCall.getArguments().get(0).getValue();
            if (functionCallArgExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
                return false;
            }
            unnestedVar = ((VariableReferenceExpression) functionCallArgExpr).getVariableReference();
            break;
        default:
            return false;
    }
    if (varUsedAbove.contains(unnestedVar)) {
        return false;
    }
    Mutable<ILogicalOperator> opRef2 = op1.getInputs().get(0);
    AbstractLogicalOperator r = (AbstractLogicalOperator) opRef2.getValue();
    if (r.getOperatorTag() != LogicalOperatorTag.GROUP) {
        return false;
    }
    // go inside of a group-by plan
    GroupByOperator gby = (GroupByOperator) r;
    if (gby.getNestedPlans().size() != 1) {
        return false;
    }
    if (gby.getNestedPlans().get(0).getRoots().size() != 1) {
        return false;
    }
    AbstractLogicalOperator nestedPlanRoot = (AbstractLogicalOperator) gby.getNestedPlans().get(0).getRoots().get(0).getValue();
    if (nestedPlanRoot.getOperatorTag() != LogicalOperatorTag.AGGREGATE) {
        return false;
    }
    AggregateOperator agg = (AggregateOperator) nestedPlanRoot;
    Mutable<ILogicalOperator> aggInputOpRef = agg.getInputs().get(0);
    if (agg.getVariables().size() > 1) {
        return false;
    }
    if (OperatorManipulationUtil.ancestorOfOperators(agg, ImmutableSet.of(LogicalOperatorTag.LIMIT, LogicalOperatorTag.ORDER, LogicalOperatorTag.GROUP, LogicalOperatorTag.DISTINCT))) {
        return false;
    }
    LogicalVariable aggVar = agg.getVariables().get(0);
    ILogicalExpression aggFun = agg.getExpressions().get(0).getValue();
    if (!aggVar.equals(unnestedVar) || ((AbstractLogicalExpression) aggFun).getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
        return false;
    }
    AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) aggFun;
    if (!BuiltinFunctions.LISTIFY.equals(f.getFunctionIdentifier())) {
        return false;
    }
    if (f.getArguments().size() != 1) {
        return false;
    }
    ILogicalExpression arg0 = f.getArguments().get(0).getValue();
    if (((AbstractLogicalExpression) arg0).getExpressionTag() != LogicalExpressionTag.VARIABLE) {
        return false;
    }
    LogicalVariable paramVar = ((VariableReferenceExpression) arg0).getVariableReference();
    ArrayList<LogicalVariable> assgnVars = new ArrayList<LogicalVariable>(1);
    assgnVars.add(unnest1.getVariable());
    ArrayList<Mutable<ILogicalExpression>> assgnExprs = new ArrayList<Mutable<ILogicalExpression>>(1);
    assgnExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(paramVar)));
    AssignOperator assign = new AssignOperator(assgnVars, assgnExprs);
    LogicalVariable posVar = unnest1.getPositionalVariable();
    if (posVar == null) {
        // Creates assignment for group-by keys.
        ArrayList<LogicalVariable> gbyKeyAssgnVars = new ArrayList<LogicalVariable>();
        ArrayList<Mutable<ILogicalExpression>> gbyKeyAssgnExprs = new ArrayList<Mutable<ILogicalExpression>>();
        for (int i = 0; i < gby.getGroupByList().size(); i++) {
            if (gby.getGroupByList().get(i).first != null) {
                gbyKeyAssgnVars.add(gby.getGroupByList().get(i).first);
                gbyKeyAssgnExprs.add(gby.getGroupByList().get(i).second);
            }
        }
        // Moves the nested pipeline before aggregation out of the group-by op.
        Mutable<ILogicalOperator> bottomOpRef = aggInputOpRef;
        AbstractLogicalOperator bottomOp = (AbstractLogicalOperator) bottomOpRef.getValue();
        while (bottomOp.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) {
            bottomOpRef = bottomOp.getInputs().get(0);
            bottomOp = (AbstractLogicalOperator) bottomOpRef.getValue();
        }
        // Removes the group-by operator.
        opRef.setValue(assign);
        assign.getInputs().add(aggInputOpRef);
        AssignOperator gbyKeyAssign = new AssignOperator(gbyKeyAssgnVars, gbyKeyAssgnExprs);
        gbyKeyAssign.getInputs().add(gby.getInputs().get(0));
        bottomOpRef.setValue(gbyKeyAssign);
        context.computeAndSetTypeEnvironmentForOperator(gbyKeyAssign);
        context.computeAndSetTypeEnvironmentForOperator(assign);
    } else {
        // if positional variable is used in unnest, the unnest will be pushed into the group-by as a running-aggregate
        // First create assign for the unnest variable
        List<LogicalVariable> nestedAssignVars = new ArrayList<LogicalVariable>();
        List<Mutable<ILogicalExpression>> nestedAssignExprs = new ArrayList<Mutable<ILogicalExpression>>();
        nestedAssignVars.add(unnest1.getVariable());
        nestedAssignExprs.add(new MutableObject<ILogicalExpression>(arg0));
        AssignOperator nestedAssign = new AssignOperator(nestedAssignVars, nestedAssignExprs);
        nestedAssign.getInputs().add(opRef2);
        // Then create running aggregation for the positional variable
        List<LogicalVariable> raggVars = new ArrayList<LogicalVariable>();
        List<Mutable<ILogicalExpression>> raggExprs = new ArrayList<Mutable<ILogicalExpression>>();
        raggVars.add(posVar);
        StatefulFunctionCallExpression fce = new StatefulFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.TID), UnpartitionedPropertyComputer.INSTANCE);
        raggExprs.add(new MutableObject<ILogicalExpression>(fce));
        RunningAggregateOperator raggOp = new RunningAggregateOperator(raggVars, raggExprs);
        raggOp.setExecutionMode(unnest1.getExecutionMode());
        RunningAggregatePOperator raggPOp = new RunningAggregatePOperator();
        raggOp.setPhysicalOperator(raggPOp);
        raggOp.getInputs().add(nestedPlanRoot.getInputs().get(0));
        gby.getNestedPlans().get(0).getRoots().set(0, new MutableObject<ILogicalOperator>(raggOp));
        opRef.setValue(nestedAssign);
        context.computeAndSetTypeEnvironmentForOperator(nestedAssign);
        context.computeAndSetTypeEnvironmentForOperator(raggOp);
        context.computeAndSetTypeEnvironmentForOperator(gby);
    }
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) AbstractLogicalExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression) StatefulFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) ArrayList(java.util.ArrayList) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) RunningAggregatePOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.RunningAggregatePOperator) UnnestOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) RunningAggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.RunningAggregateOperator) RunningAggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.RunningAggregateOperator)

Example 39 with AggregateOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator in project asterixdb by apache.

the class AggregatePOperator method computeDeliveredProperties.

@Override
public void computeDeliveredProperties(ILogicalOperator op, IOptimizationContext context) {
    AggregateOperator aggOp = (AggregateOperator) op;
    ILogicalOperator op2 = op.getInputs().get(0).getValue();
    if (aggOp.getExecutionMode() != AbstractLogicalOperator.ExecutionMode.UNPARTITIONED) {
        deliveredProperties = new StructuralPropertiesVector(op2.getDeliveredPhysicalProperties().getPartitioningProperty(), new ArrayList<>());
    } else {
        deliveredProperties = new StructuralPropertiesVector(IPartitioningProperty.UNPARTITIONED, new ArrayList<>());
    }
}
Also used : StructuralPropertiesVector(org.apache.hyracks.algebricks.core.algebra.properties.StructuralPropertiesVector) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) ArrayList(java.util.ArrayList)

Example 40 with AggregateOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator in project asterixdb by apache.

the class ExternalGroupByPOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    List<LogicalVariable> gbyCols = getGbyColumns();
    int[] keys = JobGenHelper.variablesToFieldIndexes(gbyCols, inputSchemas[0]);
    GroupByOperator gby = (GroupByOperator) op;
    int numFds = gby.getDecorList().size();
    int[] fdColumns = new int[numFds];
    int j = 0;
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        ILogicalExpression expr = p.second.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new AlgebricksException("pre-sorted group-by expects variable references.");
        }
        VariableReferenceExpression v = (VariableReferenceExpression) expr;
        LogicalVariable decor = v.getVariableReference();
        fdColumns[j++] = inputSchemas[0].findVariable(decor);
    }
    if (gby.getNestedPlans().size() != 1) {
        throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    ILogicalPlan p0 = gby.getNestedPlans().get(0);
    if (p0.getRoots().size() != 1) {
        throw new AlgebricksException("External group-by currently works only for one nested plan with one root containing" + "an aggregate and a nested-tuple-source.");
    }
    Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
    IPartialAggregationTypeComputer partialAggregationTypeComputer = context.getPartialAggregationTypeComputer();
    List<Object> intermediateTypes = new ArrayList<Object>();
    int n = aggOp.getExpressions().size();
    ISerializedAggregateEvaluatorFactory[] aff = new ISerializedAggregateEvaluatorFactory[n];
    int i = 0;
    IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
    IVariableTypeEnvironment aggOpInputEnv = context.getTypeEnvironment(aggOp.getInputs().get(0).getValue());
    IVariableTypeEnvironment outputEnv = context.getTypeEnvironment(op);
    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) exprRef.getValue();
        aff[i++] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(aggFun, aggOpInputEnv, inputSchemas, context);
        intermediateTypes.add(partialAggregationTypeComputer.getType(aggFun, aggOpInputEnv, context.getMetadataProvider()));
    }
    int[] keyAndDecFields = new int[keys.length + fdColumns.length];
    for (i = 0; i < keys.length; ++i) {
        keyAndDecFields[i] = keys[i];
    }
    for (i = 0; i < fdColumns.length; i++) {
        keyAndDecFields[keys.length + i] = fdColumns[i];
    }
    List<LogicalVariable> keyAndDecVariables = new ArrayList<LogicalVariable>();
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) {
        keyAndDecVariables.add(p.first);
    }
    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) {
        keyAndDecVariables.add(GroupByOperator.getDecorVariable(p));
    }
    for (LogicalVariable var : keyAndDecVariables) {
        aggOpInputEnv.setVarType(var, outputEnv.getVarType(var));
    }
    compileSubplans(inputSchemas[0], gby, opSchema, context);
    IOperatorDescriptorRegistry spec = builder.getJobSpec();
    IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(gbyCols, aggOpInputEnv, context);
    RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
    IBinaryHashFunctionFamily[] hashFunctionFactories = JobGenHelper.variablesToBinaryHashFunctionFamilies(gbyCols, aggOpInputEnv, context);
    ISerializedAggregateEvaluatorFactory[] merges = new ISerializedAggregateEvaluatorFactory[n];
    List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
    IOperatorSchema[] localInputSchemas = new IOperatorSchema[1];
    localInputSchemas[0] = new OperatorSchemaImpl();
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression aggFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        aggFun.getUsedVariables(usedVars);
    }
    i = 0;
    for (Object type : intermediateTypes) {
        aggOpInputEnv.setVarType(usedVars.get(i++), type);
    }
    for (LogicalVariable keyVar : keyAndDecVariables) {
        localInputSchemas[0].addVariable(keyVar);
    }
    for (LogicalVariable usedVar : usedVars) {
        localInputSchemas[0].addVariable(usedVar);
    }
    for (i = 0; i < n; i++) {
        AggregateFunctionCallExpression mergeFun = (AggregateFunctionCallExpression) aggOp.getMergeExpressions().get(i).getValue();
        merges[i] = expressionRuntimeProvider.createSerializableAggregateFunctionFactory(mergeFun, aggOpInputEnv, localInputSchemas, context);
    }
    IAggregatorDescriptorFactory aggregatorFactory = new SerializableAggregatorDescriptorFactory(aff);
    IAggregatorDescriptorFactory mergeFactory = new SerializableAggregatorDescriptorFactory(merges);
    INormalizedKeyComputerFactory normalizedKeyFactory = JobGenHelper.variablesToAscNormalizedKeyComputerFactory(gbyCols, aggOpInputEnv, context);
    // Calculates the hash table size (# of unique hash values) based on the budget and a tuple size.
    int memoryBudgetInBytes = context.getFrameSize() * frameLimit;
    int groupByColumnsCount = gby.getGroupByList().size() + numFds;
    int hashTableSize = ExternalGroupOperatorDescriptor.calculateGroupByTableCardinality(memoryBudgetInBytes, groupByColumnsCount, context.getFrameSize());
    ExternalGroupOperatorDescriptor gbyOpDesc = new ExternalGroupOperatorDescriptor(spec, hashTableSize, inputSize, keyAndDecFields, frameLimit, comparatorFactories, normalizedKeyFactory, aggregatorFactory, mergeFactory, recordDescriptor, recordDescriptor, new HashSpillableTableFactory(hashFunctionFactories));
    contributeOpDesc(builder, gby, gbyOpDesc);
    ILogicalOperator src = op.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, op, 0);
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) ArrayList(java.util.ArrayList) ISerializedAggregateEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.ISerializedAggregateEvaluatorFactory) IExpressionRuntimeProvider(org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IPartialAggregationTypeComputer(org.apache.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer) IBinaryHashFunctionFamily(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) SerializableAggregatorDescriptorFactory(org.apache.hyracks.algebricks.runtime.operators.aggreg.SerializableAggregatorDescriptorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IOperatorDescriptorRegistry(org.apache.hyracks.api.job.IOperatorDescriptorRegistry) OperatorSchemaImpl(org.apache.hyracks.algebricks.core.jobgen.impl.OperatorSchemaImpl) IAggregatorDescriptorFactory(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)

Aggregations

AggregateOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator)40 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)28 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)28 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)23 Mutable (org.apache.commons.lang3.mutable.Mutable)21 ILogicalPlan (org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan)21 AbstractLogicalOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator)20 ArrayList (java.util.ArrayList)19 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)17 GroupByOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator)14 AggregateFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression)13 AbstractFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression)10 AssignOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator)10 NestedTupleSourceOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator)10 UnnestOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator)10 MutableObject (org.apache.commons.lang3.mutable.MutableObject)9 Pair (org.apache.hyracks.algebricks.common.utils.Pair)9 SubplanOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator)7 HashSet (java.util.HashSet)6 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)6