use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class PreclusteredGroupByPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
int[] keys = JobGenHelper.variablesToFieldIndexes(columnList, inputSchemas[0]);
GroupByOperator gby = (GroupByOperator) op;
int[] fdColumns = getFdColumns(gby, inputSchemas[0]);
// compile subplans and set the gby op. schema accordingly
AlgebricksPipeline[] subplans = compileSubplans(inputSchemas[0], gby, opSchema, context);
IAggregatorDescriptorFactory aggregatorFactory;
if (gby.getNestedPlans().get(0).getRoots().get(0).getValue().getOperatorTag() == LogicalOperatorTag.RUNNINGAGGREGATE) {
aggregatorFactory = new NestedPlansRunningAggregatorFactory(subplans, keys, fdColumns);
} else {
aggregatorFactory = new NestedPlansAccumulatingAggregatorFactory(subplans, keys, fdColumns);
}
IOperatorDescriptorRegistry spec = builder.getJobSpec();
IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(columnList, context.getTypeEnvironment(op), context);
RecordDescriptor recordDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
PreclusteredGroupOperatorDescriptor opDesc = new PreclusteredGroupOperatorDescriptor(spec, keys, comparatorFactories, aggregatorFactory, recordDescriptor, groupAll);
contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
ILogicalOperator src = op.getInputs().get(0).getValue();
builder.contributeGraphEdge(src, 0, op, 0);
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class SortGroupByPOperator method computeDeliveredProperties.
@Override
public void computeDeliveredProperties(ILogicalOperator op, IOptimizationContext context) {
List<ILocalStructuralProperty> propsLocal = new LinkedList<ILocalStructuralProperty>();
GroupByOperator gOp = (GroupByOperator) op;
Set<LogicalVariable> columnSet = new ListSet<LogicalVariable>();
List<OrderColumn> ocs = new ArrayList<OrderColumn>();
if (!columnSet.isEmpty()) {
propsLocal.add(new LocalGroupingProperty(columnSet));
}
for (OrderColumn oc : orderColumns) {
ocs.add(oc);
}
propsLocal.add(new LocalOrderProperty(ocs));
for (ILogicalPlan p : gOp.getNestedPlans()) {
for (Mutable<ILogicalOperator> r : p.getRoots()) {
ILogicalOperator rOp = r.getValue();
propsLocal.addAll(rOp.getDeliveredPhysicalProperties().getLocalProperties());
}
}
ILogicalOperator op2 = op.getInputs().get(0).getValue();
IPhysicalPropertiesVector childProp = op2.getDeliveredPhysicalProperties();
deliveredProperties = new StructuralPropertiesVector(childProp.getPartitioningProperty(), propsLocal);
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class InlineAllNtsInSubplanVisitor method wrapLimitInGroupBy.
private Pair<ILogicalOperator, LogicalVariable> wrapLimitInGroupBy(ILogicalOperator op, LogicalVariable recordVar, Set<LogicalVariable> inputLiveVars) throws AlgebricksException {
GroupByOperator gbyOp = new GroupByOperator();
List<Pair<LogicalVariable, LogicalVariable>> keyVarNewVarPairs = new ArrayList<>();
for (LogicalVariable keyVar : correlatedKeyVars) {
// This limits the visitor can only be applied to a nested logical
// plan inside a Subplan operator,
// where the keyVarsToEnforce forms a candidate key which can
// uniquely identify a tuple out of the nested-tuple-source.
LogicalVariable newVar = context.newVar();
gbyOp.getGroupByList().add(new Pair<>(newVar, new MutableObject<>(new VariableReferenceExpression(keyVar))));
keyVarNewVarPairs.add(new Pair<>(keyVar, newVar));
}
// Creates an aggregate operator doing LISTIFY, as the root of the
// nested plan of the added group-by operator.
List<LogicalVariable> aggVarList = new ArrayList<LogicalVariable>();
List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<Mutable<ILogicalExpression>>();
LogicalVariable aggVar = context.newVar();
List<Mutable<ILogicalExpression>> aggArgList = new ArrayList<>();
aggVarList.add(aggVar);
// Creates an aggregation function expression.
aggArgList.add(new MutableObject<>(new VariableReferenceExpression(recordVar)));
ILogicalExpression aggExpr = new AggregateFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.LISTIFY), false, aggArgList);
aggExprList.add(new MutableObject<>(aggExpr));
AggregateOperator aggOp = new AggregateOperator(aggVarList, aggExprList);
// Adds the original limit operator as the input operator to the added
// aggregate operator.
aggOp.getInputs().add(new MutableObject<>(op));
op.getInputs().clear();
ILogicalOperator currentOp = op;
if (!orderingExprs.isEmpty()) {
OrderOperator orderOp = new OrderOperator(cloneOrderingExpression(orderingExprs));
op.getInputs().add(new MutableObject<>(orderOp));
currentOp = orderOp;
}
// Adds a nested tuple source operator as the input operator to the
// limit operator.
NestedTupleSourceOperator nts = new NestedTupleSourceOperator(new MutableObject<ILogicalOperator>(gbyOp));
currentOp.getInputs().add(new MutableObject<>(nts));
// Sets the root of the added nested plan to the aggregate operator.
ILogicalPlan nestedPlan = new ALogicalPlanImpl();
nestedPlan.getRoots().add(new MutableObject<>(aggOp));
// Sets the nested plan for the added group-by operator.
gbyOp.getNestedPlans().add(nestedPlan);
// Updates variable mapping for ancestor operators.
for (Pair<LogicalVariable, LogicalVariable> keyVarNewVar : keyVarNewVarPairs) {
updateInputToOutputVarMapping(keyVarNewVar.first, keyVarNewVar.second, false);
}
return new Pair<>(gbyOp, aggVar);
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.
private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
GroupByOperator gby = (GroupByOperator) op;
if (gby.getNestedPlans().size() == 1) {
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() == 1) {
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
boolean serializable = true;
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
serializable = false;
break;
}
}
if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
boolean setToExternalGby = false;
if (serializable) {
// if serializable, use external group-by
// now check whether the serialized version aggregation function has corresponding intermediate agg
boolean hasIntermediateAgg = true;
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
List<LogicalVariable> originalVariables = aggOp.getVariables();
List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
int aggNum = aggExprs.size();
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
hasIntermediateAgg = false;
break;
}
}
// Check whether there are multiple aggregates in the sub plan.
// Currently, we don't support multiple aggregates in one external group-by.
boolean multipleAggOpsFound = false;
ILogicalOperator r1Logical = aggOp;
while (r1Logical.hasInputs()) {
r1Logical = r1Logical.getInputs().get(0).getValue();
if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
multipleAggOpsFound = true;
break;
}
}
if (hasIntermediateAgg && !multipleAggOpsFound) {
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
aggOp.getExpressions().get(i).setValue(serialAggExpr);
}
ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
generateMergeAggregationExpressions(gby, context);
op.setPhysicalOperator(externalGby);
setToExternalGby = true;
}
}
if (!setToExternalGby) {
// if not serializable or no intermediate agg, use pre-clustered group-by
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
}
}
} else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
} else {
throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
}
}
}
}
if (op.getPhysicalOperator() == null) {
switch(op.getOperatorTag()) {
case INNERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
break;
}
case LEFTOUTERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
break;
}
case UNNEST_MAP:
case LEFT_OUTER_UNNEST_MAP:
{
ILogicalExpression unnestExpr = null;
unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
if (dsi == null) {
throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
}
IndexType indexType = jobGenParams.getIndexType();
boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
switch(indexType) {
case BTREE:
{
BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
btreeJobGenParams.readFromFuncArgs(f.getArguments());
op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
break;
}
case RTREE:
{
op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
break;
}
case SINGLE_PARTITION_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
break;
}
case LENGTH_PARTITIONED_WORD_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
break;
}
default:
{
throw new NotImplementedException(indexType + " indexes are not implemented.");
}
}
}
break;
}
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
setPhysicalOperators(p, context);
}
}
for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
}
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator in project asterixdb by apache.
the class InlineAllNtsInSubplanVisitor method visitAggregateOperator.
/**
* Wraps an AggregateOperator or RunningAggregateOperator with a group-by
* operator where the group-by keys are variables in keyVarsToEnforce. Note
* that the function here prevents this visitor being used to rewrite
* arbitrary query plans. Instead, it could only be used for rewriting a
* nested plan within a subplan operator.
*
* @param op
* the logical operator for aggregate or running aggregate.
* @return the wrapped group-by operator if {@code keyVarsToEnforce} is not
* empty, and {@code op} otherwise.
* @throws AlgebricksException
*/
private ILogicalOperator visitAggregateOperator(ILogicalOperator op) throws AlgebricksException {
visitSingleInputOperator(op);
if (correlatedKeyVars.isEmpty()) {
return op;
}
GroupByOperator gbyOp = new GroupByOperator();
// Creates a copy of correlatedKeyVars, to fix the ConcurrentModificationExcetpion in ASTERIXDB-1581.
List<LogicalVariable> copyOfCorrelatedKeyVars = new ArrayList<>(correlatedKeyVars);
for (LogicalVariable keyVar : copyOfCorrelatedKeyVars) {
// This limits the visitor can only be applied to a nested logical
// plan inside a Subplan operator,
// where the keyVarsToEnforce forms a candidate key which can
// uniquely identify a tuple out of the nested-tuple-source.
LogicalVariable newVar = context.newVar();
gbyOp.getGroupByList().add(new Pair<>(newVar, new MutableObject<>(new VariableReferenceExpression(keyVar))));
updateInputToOutputVarMapping(keyVar, newVar, false);
}
ILogicalOperator inputOp = op.getInputs().get(0).getValue();
gbyOp.getInputs().add(new MutableObject<>(inputOp));
NestedTupleSourceOperator nts = new NestedTupleSourceOperator(new MutableObject<ILogicalOperator>(gbyOp));
op.getInputs().clear();
op.getInputs().add(new MutableObject<>(nts));
ILogicalPlan nestedPlan = new ALogicalPlanImpl();
nestedPlan.getRoots().add(new MutableObject<>(op));
gbyOp.getNestedPlans().add(nestedPlan);
OperatorManipulationUtil.computeTypeEnvironmentBottomUp(gbyOp, context);
return op;
}
Aggregations