use of org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator in project asterixdb by apache.
the class RemoveLeftOuterUnnestForLeftOuterJoinRule method rewritePre.
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
ILogicalOperator op1 = opRef.getValue();
// Checks the plan pattern.
if (!checkOperatorPattern(op1)) {
return false;
}
LeftOuterUnnestOperator outerUnnest = (LeftOuterUnnestOperator) op1;
GroupByOperator gbyOperator = (GroupByOperator) outerUnnest.getInputs().get(0).getValue();
LeftOuterJoinOperator lojOperator = (LeftOuterJoinOperator) gbyOperator.getInputs().get(0).getValue();
// Checks whether the left outer unnest and the group-by operator are qualified for rewriting.
Triple<Boolean, ILogicalExpression, ILogicalExpression> checkGbyResult = checkUnnestAndGby(outerUnnest, gbyOperator);
// The argument for listify and not(is-missing(...)) check should be variables.
if (!isVariableReference(checkGbyResult.second) || !isVariableReference(checkGbyResult.third)) {
return false;
}
// Checks whether both the listify variable and the condition test variable are from the right input
// branch of the left outer join.
LogicalVariable listifyVar = ((VariableReferenceExpression) checkGbyResult.second).getVariableReference();
LogicalVariable conditionTestVar = ((VariableReferenceExpression) checkGbyResult.third).getVariableReference();
if (!checkListifyAndConditionVar(lojOperator, listifyVar, conditionTestVar)) {
return false;
}
// Does the rewrite.
removeGroupByAndOuterUnnest(opRef, context, outerUnnest, gbyOperator, lojOperator, listifyVar);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator in project asterixdb by apache.
the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.
private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
GroupByOperator gby = (GroupByOperator) op;
if (gby.getNestedPlans().size() == 1) {
ILogicalPlan p0 = gby.getNestedPlans().get(0);
if (p0.getRoots().size() == 1) {
Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
AggregateOperator aggOp = (AggregateOperator) r0.getValue();
boolean serializable = true;
for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
serializable = false;
break;
}
}
if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
boolean setToExternalGby = false;
if (serializable) {
// if serializable, use external group-by
// now check whether the serialized version aggregation function has corresponding intermediate agg
boolean hasIntermediateAgg = true;
IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
List<LogicalVariable> originalVariables = aggOp.getVariables();
List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
int aggNum = aggExprs.size();
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
hasIntermediateAgg = false;
break;
}
}
// Check whether there are multiple aggregates in the sub plan.
// Currently, we don't support multiple aggregates in one external group-by.
boolean multipleAggOpsFound = false;
ILogicalOperator r1Logical = aggOp;
while (r1Logical.hasInputs()) {
r1Logical = r1Logical.getInputs().get(0).getValue();
if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
multipleAggOpsFound = true;
break;
}
}
if (hasIntermediateAgg && !multipleAggOpsFound) {
for (int i = 0; i < aggNum; i++) {
AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
aggOp.getExpressions().get(i).setValue(serialAggExpr);
}
ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
generateMergeAggregationExpressions(gby, context);
op.setPhysicalOperator(externalGby);
setToExternalGby = true;
}
}
if (!setToExternalGby) {
// if not serializable or no intermediate agg, use pre-clustered group-by
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
}
}
} else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
ILogicalExpression expr = p.second.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
columnList.add(varRef.getVariableReference());
}
}
op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
} else {
throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
}
}
}
}
if (op.getPhysicalOperator() == null) {
switch(op.getOperatorTag()) {
case INNERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
break;
}
case LEFTOUTERJOIN:
{
JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
break;
}
case UNNEST_MAP:
case LEFT_OUTER_UNNEST_MAP:
{
ILogicalExpression unnestExpr = null;
unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier fid = f.getFunctionIdentifier();
if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
throw new IllegalStateException();
}
AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
jobGenParams.readFromFuncArgs(f.getArguments());
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
if (dsi == null) {
throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
}
IndexType indexType = jobGenParams.getIndexType();
boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
switch(indexType) {
case BTREE:
{
BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
btreeJobGenParams.readFromFuncArgs(f.getArguments());
op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
break;
}
case RTREE:
{
op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
break;
}
case SINGLE_PARTITION_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
break;
}
case LENGTH_PARTITIONED_WORD_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX:
{
op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
break;
}
default:
{
throw new NotImplementedException(indexType + " indexes are not implemented.");
}
}
}
break;
}
}
}
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan p : nested.getNestedPlans()) {
setPhysicalOperators(p, context);
}
}
for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
}
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator in project asterixdb by apache.
the class IntroduceJoinAccessMethodRule method checkAndApplyJoinTransformation.
/**
* Recursively traverse the given plan and check whether a INNERJOIN or LEFTOUTERJOIN operator exists.
* If one is found, maintain the path from the root to the given join operator and
* optimize the path from the given join operator to the EMPTY_TUPLE_SOURCE operator
* if it is not already optimized.
*/
protected boolean checkAndApplyJoinTransformation(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
boolean joinFoundAndOptimizationApplied;
// Check the current operator pattern to see whether it is a JOIN or not.
boolean isThisOpInnerJoin = isInnerJoin(op);
boolean isThisOpLeftOuterJoin = isLeftOuterJoin(op);
boolean isParentOpGroupBy = hasGroupBy;
Mutable<ILogicalOperator> joinRefFromThisOp = null;
AbstractBinaryJoinOperator joinOpFromThisOp = null;
if (isThisOpInnerJoin) {
// Set join operator.
joinRef = opRef;
joinOp = (InnerJoinOperator) op;
joinRefFromThisOp = opRef;
joinOpFromThisOp = (InnerJoinOperator) op;
} else if (isThisOpLeftOuterJoin) {
// Set left-outer-join op.
// The current operator is GROUP and the child of this op is LEFTOUERJOIN.
joinRef = op.getInputs().get(0);
joinOp = (LeftOuterJoinOperator) joinRef.getValue();
joinRefFromThisOp = op.getInputs().get(0);
joinOpFromThisOp = (LeftOuterJoinOperator) joinRefFromThisOp.getValue();
}
// to make sure an earlier join in the path is optimized first.
for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
joinFoundAndOptimizationApplied = checkAndApplyJoinTransformation(inputOpRef, context);
if (joinFoundAndOptimizationApplied) {
return true;
}
}
// For a JOIN case, try to transform the given plan.
if (isThisOpInnerJoin || isThisOpLeftOuterJoin) {
// Restore the information from this operator since it might have been be set to null
// if there are other join operators in the earlier path.
joinRef = joinRefFromThisOp;
joinOp = joinOpFromThisOp;
boolean continueCheck = true;
// Already checked? If not, this operator may be optimized.
if (context.checkIfInDontApplySet(this, joinOp)) {
continueCheck = false;
}
// For each access method, this contains the information about
// whether an available index can be applicable or not.
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = null;
if (continueCheck) {
analyzedAMs = new HashMap<>();
}
// whether the given plan is truly optimizable or not.
if (continueCheck && !checkJoinOpConditionAndInitSubTree(context)) {
continueCheck = false;
}
// Analyze the condition of SELECT operator and initialize analyzedAMs.
// Check whether the function in the SELECT operator can be truly transformed.
boolean matchInLeftSubTree = false;
boolean matchInRightSubTree = false;
if (continueCheck) {
if (leftSubTree.hasDataSource()) {
matchInLeftSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond, leftSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
}
if (rightSubTree.hasDataSource()) {
matchInRightSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond, rightSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
}
}
// Find the dataset from the data-source and the record type of the dataset from the metadata.
// This will be used to find an applicable index on the dataset.
boolean checkLeftSubTreeMetadata = false;
boolean checkRightSubTreeMetadata = false;
if (continueCheck && (matchInLeftSubTree || matchInRightSubTree)) {
// Set dataset and type metadata.
if (matchInLeftSubTree) {
checkLeftSubTreeMetadata = leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
}
if (matchInRightSubTree) {
checkRightSubTreeMetadata = rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
}
}
if (continueCheck && (checkLeftSubTreeMetadata || checkRightSubTreeMetadata)) {
// Then find the applicable indexes for the variables used in the JOIN condition.
if (checkLeftSubTreeMetadata) {
fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context);
}
if (checkRightSubTreeMetadata) {
fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context);
}
// Prune the access methods based on the function expression and access methods.
pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
// If the right subtree (inner branch) has indexes, one of those indexes will be used.
// Remove the indexes from the outer branch in the optimizer's consideration list for this rule.
pruneIndexCandidatesFromOuterBranch(analyzedAMs);
// We are going to use indexes from the inner branch.
// If no index is available, then we stop here.
Pair<IAccessMethod, Index> chosenIndex = chooseBestIndex(analyzedAMs);
if (chosenIndex == null) {
context.addToDontApplySet(this, joinOp);
continueCheck = false;
}
if (continueCheck) {
// Apply plan transformation using chosen index.
AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first);
// in GroupByOp.
if (isThisOpLeftOuterJoin && isParentOpGroupBy) {
analysisCtx.setLOJGroupbyOpRef(opRef);
ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils.findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue());
analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
}
Dataset indexDataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex.second);
// from the right subtree. The following is just a sanity check.
if (!rightSubTree.hasDataSourceScan() && !indexDataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
return false;
}
// Finally, try to apply plan transformation using chosen index.
boolean res = chosenIndex.first.applyJoinPlanTransformation(joinRef, leftSubTree, rightSubTree, chosenIndex.second, analysisCtx, context, isThisOpLeftOuterJoin, isParentOpGroupBy);
// will find them.
if (res) {
return res;
}
}
}
joinRef = null;
joinOp = null;
}
return false;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator in project asterixdb by apache.
the class IsomorphismOperatorVisitor method visitLeftOuterJoinOperator.
@Override
public Boolean visitLeftOuterJoinOperator(LeftOuterJoinOperator op, ILogicalOperator arg) throws AlgebricksException {
AbstractLogicalOperator aop = (AbstractLogicalOperator) arg;
if (aop.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
return Boolean.FALSE;
}
LeftOuterJoinOperator joinOpArg = (LeftOuterJoinOperator) copyAndSubstituteVar(op, arg);
boolean isomorphic = op.getCondition().getValue().equals(joinOpArg.getCondition().getValue());
return isomorphic;
}
use of org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator in project asterixdb by apache.
the class IntroduceLeftOuterJoinForSubplanRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
if (op0.getOperatorTag() != LogicalOperatorTag.SUBPLAN) {
return false;
}
SubplanOperator subplan = (SubplanOperator) op0;
Iterator<ILogicalPlan> plansIter = subplan.getNestedPlans().iterator();
ILogicalPlan p = null;
while (plansIter.hasNext()) {
p = plansIter.next();
}
if (p == null) {
return false;
}
if (p.getRoots().size() != 1) {
return false;
}
Mutable<ILogicalOperator> subplanRoot = p.getRoots().get(0);
AbstractLogicalOperator op1 = (AbstractLogicalOperator) subplanRoot.getValue();
Mutable<ILogicalOperator> opUnder = subplan.getInputs().get(0);
if (OperatorPropertiesUtil.isMissingTest((AbstractLogicalOperator) opUnder.getValue())) {
return false;
}
switch(op1.getOperatorTag()) {
case INNERJOIN:
{
InnerJoinOperator join = (InnerJoinOperator) op1;
Mutable<ILogicalOperator> leftRef = join.getInputs().get(0);
Mutable<ILogicalOperator> rightRef = join.getInputs().get(1);
Mutable<ILogicalOperator> ntsRef = getNtsAtEndOfPipeline(leftRef);
if (ntsRef == null) {
ntsRef = getNtsAtEndOfPipeline(rightRef);
if (ntsRef == null) {
return false;
} else {
Mutable<ILogicalOperator> t = leftRef;
leftRef = rightRef;
rightRef = t;
}
}
ntsRef.setValue(opUnder.getValue());
LeftOuterJoinOperator loj = new LeftOuterJoinOperator(join.getCondition());
loj.getInputs().add(leftRef);
loj.getInputs().add(rightRef);
opRef.setValue(loj);
context.computeAndSetTypeEnvironmentForOperator(loj);
return true;
}
case LEFTOUTERJOIN:
{
LeftOuterJoinOperator join = (LeftOuterJoinOperator) op1;
Mutable<ILogicalOperator> leftRef = join.getInputs().get(0);
Mutable<ILogicalOperator> ntsRef = getNtsAtEndOfPipeline(leftRef);
if (ntsRef == null) {
return false;
}
ntsRef.setValue(opUnder.getValue());
opRef.setValue(join);
context.computeAndSetTypeEnvironmentForOperator(join);
return true;
}
default:
{
return false;
}
}
}
Aggregations