use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class FuzzyEqRule method expandFuzzyEq.
private boolean expandFuzzyEq(Mutable<ILogicalExpression> expRef, IOptimizationContext context, IVariableTypeEnvironment env, MetadataProvider metadataProvider) throws AlgebricksException {
ILogicalExpression exp = expRef.getValue();
if (exp.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return false;
}
boolean expanded = false;
AbstractFunctionCallExpression funcExp = (AbstractFunctionCallExpression) exp;
FunctionIdentifier fi = funcExp.getFunctionIdentifier();
if (fi.equals(BuiltinFunctions.FUZZY_EQ)) {
List<Mutable<ILogicalExpression>> inputExps = funcExp.getArguments();
String simFuncName = FuzzyUtils.getSimFunction(metadataProvider);
ArrayList<Mutable<ILogicalExpression>> similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
for (int i = 0; i < inputExps.size(); ++i) {
Mutable<ILogicalExpression> inputExpRef = inputExps.get(i);
similarityArgs.add(inputExpRef);
}
FunctionIdentifier simFunctionIdentifier = FuzzyUtils.getFunctionIdentifier(simFuncName);
ScalarFunctionCallExpression similarityExp = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(simFunctionIdentifier), similarityArgs);
// Add annotations from the original fuzzy-eq function.
similarityExp.getAnnotations().putAll(funcExp.getAnnotations());
ArrayList<Mutable<ILogicalExpression>> cmpArgs = new ArrayList<Mutable<ILogicalExpression>>();
cmpArgs.add(new MutableObject<ILogicalExpression>(similarityExp));
IAObject simThreshold = FuzzyUtils.getSimThreshold(metadataProvider, simFuncName);
cmpArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(simThreshold))));
ScalarFunctionCallExpression cmpExpr = FuzzyUtils.getComparisonExpr(simFuncName, cmpArgs);
expRef.setValue(cmpExpr);
return true;
} else if (fi.equals(AlgebricksBuiltinFunctions.AND) || fi.equals(AlgebricksBuiltinFunctions.OR)) {
for (int i = 0; i < 2; i++) {
if (expandFuzzyEq(funcExp.getArguments().get(i), context, env, metadataProvider)) {
expanded = true;
}
}
}
return expanded;
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class FuzzyJoinRule method rewritePost.
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
// current opperator is join
if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
return false;
}
// Find GET_ITEM function.
AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op;
Mutable<ILogicalExpression> expRef = joinOp.getCondition();
Mutable<ILogicalExpression> getItemExprRef = getSimilarityExpression(expRef);
if (getItemExprRef == null) {
return false;
}
// Check if the GET_ITEM function is on one of the supported similarity-check functions.
AbstractFunctionCallExpression getItemFuncExpr = (AbstractFunctionCallExpression) getItemExprRef.getValue();
Mutable<ILogicalExpression> argRef = getItemFuncExpr.getArguments().get(0);
AbstractFunctionCallExpression simFuncExpr = (AbstractFunctionCallExpression) argRef.getValue();
if (!simFuncs.contains(simFuncExpr.getFunctionIdentifier())) {
return false;
}
// Skip this rule based on annotations.
if (simFuncExpr.getAnnotations().containsKey(IndexedNLJoinExpressionAnnotation.INSTANCE)) {
return false;
}
List<Mutable<ILogicalOperator>> inputOps = joinOp.getInputs();
ILogicalOperator leftInputOp = inputOps.get(0).getValue();
ILogicalOperator rightInputOp = inputOps.get(1).getValue();
List<Mutable<ILogicalExpression>> inputExps = simFuncExpr.getArguments();
ILogicalExpression inputExp0 = inputExps.get(0).getValue();
ILogicalExpression inputExp1 = inputExps.get(1).getValue();
// left and right expressions are variables
if (inputExp0.getExpressionTag() != LogicalExpressionTag.VARIABLE || inputExp1.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
return false;
}
LogicalVariable inputVar0 = ((VariableReferenceExpression) inputExp0).getVariableReference();
LogicalVariable inputVar1 = ((VariableReferenceExpression) inputExp1).getVariableReference();
LogicalVariable leftInputVar;
LogicalVariable rightInputVar;
liveVars.clear();
VariableUtilities.getLiveVariables(leftInputOp, liveVars);
if (liveVars.contains(inputVar0)) {
leftInputVar = inputVar0;
rightInputVar = inputVar1;
} else {
leftInputVar = inputVar1;
rightInputVar = inputVar0;
}
List<LogicalVariable> leftInputPKs = context.findPrimaryKey(leftInputVar);
List<LogicalVariable> rightInputPKs = context.findPrimaryKey(rightInputVar);
// Bail if primary keys could not be inferred.
if (leftInputPKs == null || rightInputPKs == null) {
return false;
}
// primary key has only one variable
if (leftInputPKs.size() != 1 || rightInputPKs.size() != 1) {
return false;
}
IAType leftType = (IAType) context.getOutputTypeEnvironment(leftInputOp).getVarType(leftInputVar);
IAType rightType = (IAType) context.getOutputTypeEnvironment(rightInputOp).getVarType(rightInputVar);
// left-hand side and right-hand side of "~=" has the same type
IAType left2 = TypeComputeUtils.getActualType(leftType);
IAType right2 = TypeComputeUtils.getActualType(rightType);
if (!left2.deepEqual(right2)) {
return false;
}
//
// -- - FIRE - --
//
MetadataProvider metadataProvider = ((MetadataProvider) context.getMetadataProvider());
FunctionIdentifier funcId = FuzzyUtils.getTokenizer(leftType.getTypeTag());
String tokenizer;
if (funcId == null) {
tokenizer = "";
} else {
tokenizer = funcId.getName();
}
float simThreshold = FuzzyUtils.getSimThreshold(metadataProvider);
String simFunction = FuzzyUtils.getSimFunction(metadataProvider);
// finalize AQL+ query
String prepareJoin;
switch(joinOp.getJoinKind()) {
case INNER:
{
prepareJoin = "join" + AQLPLUS;
break;
}
case LEFT_OUTER:
{
// other sort of bug.
return false;
// prepareJoin = "loj" + AQLPLUS;
// break;
}
default:
{
throw new IllegalStateException();
}
}
String aqlPlus = String.format(Locale.US, prepareJoin, tokenizer, tokenizer, simFunction, simThreshold, tokenizer, tokenizer, simFunction, simThreshold, simFunction, simThreshold, simThreshold);
LogicalVariable leftPKVar = leftInputPKs.get(0);
LogicalVariable rightPKVar = rightInputPKs.get(0);
Counter counter = new Counter(context.getVarCounter());
AQLPlusParser parser = new AQLPlusParser(new StringReader(aqlPlus));
parser.initScope();
parser.setVarCounter(counter);
List<Clause> clauses;
try {
clauses = parser.Clauses();
} catch (ParseException e) {
throw new AlgebricksException(e);
}
// The translator will compile metadata internally. Run this compilation
// under the same transaction id as the "outer" compilation.
AqlPlusExpressionToPlanTranslator translator = new AqlPlusExpressionToPlanTranslator(metadataProvider, counter);
context.setVarCounter(counter.get());
LogicalOperatorDeepCopyWithNewVariablesVisitor deepCopyVisitor = new LogicalOperatorDeepCopyWithNewVariablesVisitor(context, context);
translator.addOperatorToMetaScope(new Identifier("#LEFT"), leftInputOp);
translator.addVariableToMetaScope(new Identifier("$$LEFT"), leftInputVar);
translator.addVariableToMetaScope(new Identifier("$$LEFTPK"), leftPKVar);
translator.addOperatorToMetaScope(new Identifier("#RIGHT"), rightInputOp);
translator.addVariableToMetaScope(new Identifier("$$RIGHT"), rightInputVar);
translator.addVariableToMetaScope(new Identifier("$$RIGHTPK"), rightPKVar);
translator.addOperatorToMetaScope(new Identifier("#LEFT_1"), deepCopyVisitor.deepCopy(leftInputOp));
translator.addVariableToMetaScope(new Identifier("$$LEFT_1"), deepCopyVisitor.varCopy(leftInputVar));
translator.addVariableToMetaScope(new Identifier("$$LEFTPK_1"), deepCopyVisitor.varCopy(leftPKVar));
deepCopyVisitor.updatePrimaryKeys(context);
deepCopyVisitor.reset();
// translator.addOperatorToMetaScope(new Identifier("#LEFT_2"),
// deepCopyVisitor.deepCopy(leftInputOp, null));
// translator.addVariableToMetaScope(new Identifier("$$LEFT_2"),
// deepCopyVisitor.varCopy(leftInputVar));
// translator.addVariableToMetaScope(new Identifier("$$LEFTPK_2"),
// deepCopyVisitor.varCopy(leftPKVar));
// deepCopyVisitor.updatePrimaryKeys(context);
// deepCopyVisitor.reset();
//
// translator.addOperatorToMetaScope(new Identifier("#LEFT_3"),
// deepCopyVisitor.deepCopy(leftInputOp, null));
// translator.addVariableToMetaScope(new Identifier("$$LEFT_3"),
// deepCopyVisitor.varCopy(leftInputVar));
// translator.addVariableToMetaScope(new Identifier("$$LEFTPK_3"),
// deepCopyVisitor.varCopy(leftPKVar));
// deepCopyVisitor.updatePrimaryKeys(context);
// deepCopyVisitor.reset();
translator.addOperatorToMetaScope(new Identifier("#RIGHT_1"), deepCopyVisitor.deepCopy(rightInputOp));
translator.addVariableToMetaScope(new Identifier("$$RIGHT_1"), deepCopyVisitor.varCopy(rightInputVar));
translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_1"), deepCopyVisitor.varCopy(rightPKVar));
deepCopyVisitor.updatePrimaryKeys(context);
deepCopyVisitor.reset();
// TODO pick side to run Stage 1, currently always picks RIGHT side
translator.addOperatorToMetaScope(new Identifier("#RIGHT_2"), deepCopyVisitor.deepCopy(rightInputOp));
translator.addVariableToMetaScope(new Identifier("$$RIGHT_2"), deepCopyVisitor.varCopy(rightInputVar));
translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_2"), deepCopyVisitor.varCopy(rightPKVar));
deepCopyVisitor.updatePrimaryKeys(context);
deepCopyVisitor.reset();
translator.addOperatorToMetaScope(new Identifier("#RIGHT_3"), deepCopyVisitor.deepCopy(rightInputOp));
translator.addVariableToMetaScope(new Identifier("$$RIGHT_3"), deepCopyVisitor.varCopy(rightInputVar));
translator.addVariableToMetaScope(new Identifier("$$RIGHTPK_3"), deepCopyVisitor.varCopy(rightPKVar));
deepCopyVisitor.updatePrimaryKeys(context);
deepCopyVisitor.reset();
ILogicalPlan plan;
try {
plan = translator.translate(clauses);
} catch (AsterixException e) {
throw new AlgebricksException(e);
}
context.setVarCounter(counter.get());
ILogicalOperator outputOp = plan.getRoots().get(0).getValue();
SelectOperator extraSelect = null;
if (getItemExprRef != expRef) {
// more than one join condition
getItemExprRef.setValue(ConstantExpression.TRUE);
switch(joinOp.getJoinKind()) {
case INNER:
{
extraSelect = new SelectOperator(expRef, false, null);
extraSelect.getInputs().add(new MutableObject<ILogicalOperator>(outputOp));
outputOp = extraSelect;
break;
}
case LEFT_OUTER:
{
if (((AbstractLogicalOperator) outputOp).getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
throw new IllegalStateException();
}
LeftOuterJoinOperator topJoin = (LeftOuterJoinOperator) outputOp;
topJoin.getCondition().setValue(expRef.getValue());
break;
}
default:
{
throw new IllegalStateException();
}
}
}
opRef.setValue(outputOp);
OperatorPropertiesUtil.typeOpRec(opRef, context);
return true;
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class RTreeSearchPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
AbstractUnnestMapOperator unnestMap = (AbstractUnnestMapOperator) op;
ILogicalExpression unnestExpr = unnestMap.getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
throw new IllegalStateException();
}
AbstractFunctionCallExpression unnestFuncExpr = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier funcIdent = unnestFuncExpr.getFunctionIdentifier();
if (!funcIdent.equals(BuiltinFunctions.INDEX_SEARCH)) {
return;
}
RTreeJobGenParams jobGenParams = new RTreeJobGenParams();
jobGenParams.readFromFuncArgs(unnestFuncExpr.getArguments());
int[] keyIndexes = getKeyIndexes(jobGenParams.getKeyVarList(), inputSchemas);
int[] minFilterFieldIndexes = getKeyIndexes(unnestMap.getMinFilterVars(), inputSchemas);
int[] maxFilterFieldIndexes = getKeyIndexes(unnestMap.getMaxFilterVars(), inputSchemas);
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
List<LogicalVariable> outputVars = unnestMap.getVariables();
if (jobGenParams.getRetainInput()) {
outputVars = new ArrayList<LogicalVariable>();
VariableUtilities.getLiveVariables(unnestMap, outputVars);
}
boolean retainNull = false;
if (op.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP) {
// By nature, LEFT_OUTER_UNNEST_MAP should generate null values for non-matching tuples.
retainNull = true;
}
Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> rtreeSearch = mp.buildRtreeRuntime(builder.getJobSpec(), outputVars, opSchema, typeEnv, context, jobGenParams.getRetainInput(), retainNull, dataset, jobGenParams.getIndexName(), keyIndexes, minFilterFieldIndexes, maxFilterFieldIndexes);
builder.contributeHyracksOperator(unnestMap, rtreeSearch.first);
builder.contributeAlgebricksPartitionConstraint(rtreeSearch.first, rtreeSearch.second);
ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class BTreeSearchPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
AbstractUnnestMapOperator unnestMap = (AbstractUnnestMapOperator) op;
ILogicalExpression unnestExpr = unnestMap.getExpressionRef().getValue();
if (unnestExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
throw new IllegalStateException();
}
AbstractFunctionCallExpression unnestFuncExpr = (AbstractFunctionCallExpression) unnestExpr;
FunctionIdentifier funcIdent = unnestFuncExpr.getFunctionIdentifier();
if (!funcIdent.equals(BuiltinFunctions.INDEX_SEARCH)) {
return;
}
BTreeJobGenParams jobGenParams = new BTreeJobGenParams();
jobGenParams.readFromFuncArgs(unnestFuncExpr.getArguments());
int[] lowKeyIndexes = getKeyIndexes(jobGenParams.getLowKeyVarList(), inputSchemas);
int[] highKeyIndexes = getKeyIndexes(jobGenParams.getHighKeyVarList(), inputSchemas);
int[] minFilterFieldIndexes = getKeyIndexes(unnestMap.getMinFilterVars(), inputSchemas);
int[] maxFilterFieldIndexes = getKeyIndexes(unnestMap.getMaxFilterVars(), inputSchemas);
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
Dataset dataset = metadataProvider.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
// By nature, LEFT_OUTER_UNNEST_MAP should generate null values for non-matching tuples.
boolean retainMissing = op.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP;
Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> btreeSearch = metadataProvider.buildBtreeRuntime(builder.getJobSpec(), opSchema, typeEnv, context, jobGenParams.getRetainInput(), retainMissing, dataset, jobGenParams.getIndexName(), lowKeyIndexes, highKeyIndexes, jobGenParams.isLowKeyInclusive(), jobGenParams.isHighKeyInclusive(), minFilterFieldIndexes, maxFilterFieldIndexes);
builder.contributeHyracksOperator(unnestMap, btreeSearch.first);
builder.contributeAlgebricksPartitionConstraint(btreeSearch.first, btreeSearch.second);
ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
use of org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier in project asterixdb by apache.
the class ExternalDataLookupPOperator method contributeRuntimeOperator.
@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema opSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
UnnestMapOperator unnestMap = (UnnestMapOperator) op;
ILogicalExpression expr = unnestMap.getExpressionRef().getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
throw new IllegalStateException();
}
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
if (!funcIdent.equals(BuiltinFunctions.EXTERNAL_LOOKUP)) {
return;
}
int[] ridIndexes = getKeyIndexes(ridVarList, inputSchemas);
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(op);
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> externalLoopup = metadataProvider.buildExternalDataLookupRuntime(builder.getJobSpec(), dataset, ridIndexes, retainInput, typeEnv, opSchema, context, metadataProvider, retainMissing);
builder.contributeHyracksOperator(unnestMap, externalLoopup.first);
builder.contributeAlgebricksPartitionConstraint(externalLoopup.first, externalLoopup.second);
ILogicalOperator srcExchange = unnestMap.getInputs().get(0).getValue();
builder.contributeGraphEdge(srcExchange, 0, unnestMap, 0);
}
Aggregations