use of org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression in project asterixdb by apache.
the class SimilarityCheckRule method getSimilarityCheckExpr.
private ScalarFunctionCallExpression getSimilarityCheckExpr(FunctionIdentifier normFuncIdent, AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) throws AlgebricksException {
// Remember args from original similarity function to add them to the similarity-check function later.
ArrayList<Mutable<ILogicalExpression>> similarityArgs = null;
ScalarFunctionCallExpression simCheckFuncExpr = null;
// Look for jaccard function call, and GE or GT.
if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.SIMILARITY_JACCARD) {
IAObject jaccThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.GE) {
if (constVal.getObject() instanceof AFloat) {
jaccThresh = constVal.getObject();
} else {
jaccThresh = new AFloat((float) ((ADouble) constVal.getObject()).getDoubleValue());
}
} else if (normFuncIdent == AlgebricksBuiltinFunctions.GT) {
float threshVal = 0.0f;
if (constVal.getObject() instanceof AFloat) {
threshVal = ((AFloat) constVal.getObject()).getFloatValue();
} else {
threshVal = (float) ((ADouble) constVal.getObject()).getDoubleValue();
}
float f = threshVal + Float.MIN_VALUE;
if (f > 1.0f)
f = 1.0f;
jaccThresh = new AFloat(f);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(jaccThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.SIMILARITY_JACCARD_CHECK), similarityArgs);
}
// Look for edit-distance function call, and LE or LT.
if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE) {
AInt32 aInt = new AInt32(0);
try {
aInt = (AInt32) ATypeHierarchy.convertNumericTypeObject(constVal.getObject(), ATypeTag.INTEGER);
} catch (HyracksDataException e) {
throw new AlgebricksException(e);
}
AInt32 edThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.LE) {
edThresh = aInt;
} else if (normFuncIdent == AlgebricksBuiltinFunctions.LT) {
int ed = aInt.getIntegerValue() - 1;
if (ed < 0)
ed = 0;
edThresh = new AInt32(ed);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(edThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.EDIT_DISTANCE_CHECK), similarityArgs);
}
// Preserve all annotations.
if (simCheckFuncExpr != null) {
simCheckFuncExpr.getAnnotations().putAll(funcExpr.getAnnotations());
}
return simCheckFuncExpr;
}
use of org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression in project asterixdb by apache.
the class SimilarityCheckRule method replaceSelectConditionExprs.
private boolean replaceSelectConditionExprs(Mutable<ILogicalExpression> expRef, List<AssignOperator> assigns, IOptimizationContext context) throws AlgebricksException {
ILogicalExpression expr = expRef.getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return false;
}
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
// then we may still need the true similarity value even if the GE/GT/LE/LE comparison returns false.
if (funcIdent == AlgebricksBuiltinFunctions.AND) {
boolean found = true;
for (int i = 0; i < funcExpr.getArguments().size(); ++i) {
found = found && replaceSelectConditionExprs(funcExpr.getArguments().get(i), assigns, context);
}
return found;
}
// Look for GE/GT/LE/LT.
if (funcIdent != AlgebricksBuiltinFunctions.GE && funcIdent != AlgebricksBuiltinFunctions.GT && funcIdent != AlgebricksBuiltinFunctions.LE && funcIdent != AlgebricksBuiltinFunctions.LT) {
return false;
}
// One arg should be a function call or a variable, the other a constant.
AsterixConstantValue constVal = null;
ILogicalExpression nonConstExpr = null;
ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
// Normalized GE/GT/LE/LT as if constant was on the right hand side.
FunctionIdentifier normFuncIdent = null;
// One of the args must be a constant.
if (arg1.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
ConstantExpression constExpr = (ConstantExpression) arg1;
constVal = (AsterixConstantValue) constExpr.getValue();
nonConstExpr = arg2;
// Get func ident as if swapping lhs and rhs.
normFuncIdent = getLhsAndRhsSwappedFuncIdent(funcIdent);
} else if (arg2.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
ConstantExpression constExpr = (ConstantExpression) arg2;
constVal = (AsterixConstantValue) constExpr.getValue();
nonConstExpr = arg1;
// Constant is already on rhs, so nothing to be done for normalizedFuncIdent.
normFuncIdent = funcIdent;
} else {
return false;
}
// The other arg is a function call. We can directly replace the select condition with an equivalent similarity check expression.
if (nonConstExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
return replaceWithFunctionCallArg(expRef, normFuncIdent, constVal, (AbstractFunctionCallExpression) nonConstExpr);
}
// The other arg ist a variable. We may have to introduce an assign operator that assigns the result of a similarity-check function to a variable.
if (nonConstExpr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
return replaceWithVariableArg(expRef, normFuncIdent, constVal, (VariableReferenceExpression) nonConstExpr, assigns, context);
}
return false;
}
use of org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression in project asterixdb by apache.
the class SimilarityCheckRule method replaceWithFunctionCallArg.
private boolean replaceWithFunctionCallArg(Mutable<ILogicalExpression> expRef, FunctionIdentifier normFuncIdent, AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) throws AlgebricksException {
// Analyze func expr to see if it is an optimizable similarity function.
ScalarFunctionCallExpression simCheckFuncExpr = getSimilarityCheckExpr(normFuncIdent, constVal, funcExpr);
// Replace the expr in the select condition.
if (simCheckFuncExpr != null) {
// Get item 0 from var.
List<Mutable<ILogicalExpression>> getItemArgs = new ArrayList<Mutable<ILogicalExpression>>();
// First arg is the similarity-check function call.
getItemArgs.add(new MutableObject<ILogicalExpression>(simCheckFuncExpr));
// Second arg is the item index to be accessed.
getItemArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(0)))));
ILogicalExpression getItemExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.GET_ITEM), getItemArgs);
// Replace the old similarity function call with the new getItemExpr.
expRef.setValue(getItemExpr);
return true;
}
return false;
}
use of org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression in project asterixdb by apache.
the class UnnestToDataScanRule method handleFunction.
protected boolean handleFunction(Mutable<ILogicalOperator> opRef, IOptimizationContext context, UnnestOperator unnest, AbstractFunctionCallExpression f) throws AlgebricksException {
FunctionIdentifier fid = f.getFunctionIdentifier();
if (fid.equals(BuiltinFunctions.DATASET)) {
if (unnest.getPositionalVariable() != null) {
// TODO remove this after enabling the support of positional variables in data scan
throw new AlgebricksException("No positional variables are allowed over datasets.");
}
ILogicalExpression expr = f.getArguments().get(0).getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
return false;
}
ConstantExpression ce = (ConstantExpression) expr;
IAlgebricksConstantValue acv = ce.getValue();
if (!(acv instanceof AsterixConstantValue)) {
return false;
}
AsterixConstantValue acv2 = (AsterixConstantValue) acv;
if (acv2.getObject().getType().getTypeTag() != ATypeTag.STRING) {
return false;
}
String datasetArg = ((AString) acv2.getObject()).getStringValue();
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
Pair<String, String> datasetReference = parseDatasetReference(metadataProvider, datasetArg);
String dataverseName = datasetReference.first;
String datasetName = datasetReference.second;
Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
if (dataset == null) {
throw new AlgebricksException("Could not find dataset " + datasetName + " in dataverse " + dataverseName);
}
DataSourceId asid = new DataSourceId(dataverseName, datasetName);
List<LogicalVariable> variables = new ArrayList<>();
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
int numPrimaryKeys = dataset.getPrimaryKeys().size();
for (int i = 0; i < numPrimaryKeys; i++) {
variables.add(context.newVar());
}
}
variables.add(unnest.getVariable());
DataSource dataSource = metadataProvider.findDataSource(asid);
boolean hasMeta = dataSource.hasMeta();
if (hasMeta) {
variables.add(context.newVar());
}
DataSourceScanOperator scan = new DataSourceScanOperator(variables, dataSource);
List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs();
scanInpList.addAll(unnest.getInputs());
opRef.setValue(scan);
addPrimaryKey(variables, dataSource, context);
context.computeAndSetTypeEnvironmentForOperator(scan);
// Adds equivalence classes --- one equivalent class between a primary key
// variable and a record field-access expression.
IAType[] schemaTypes = dataSource.getSchemaTypes();
ARecordType recordType = (ARecordType) (hasMeta ? schemaTypes[schemaTypes.length - 2] : schemaTypes[schemaTypes.length - 1]);
ARecordType metaRecordType = (ARecordType) (hasMeta ? schemaTypes[schemaTypes.length - 1] : null);
EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(scan, variables, recordType, metaRecordType, dataset, context);
return true;
} else if (fid.equals(BuiltinFunctions.FEED_COLLECT)) {
if (unnest.getPositionalVariable() != null) {
throw new AlgebricksException("No positional variables are allowed over feeds.");
}
String dataverse = ConstantExpressionUtil.getStringArgument(f, 0);
String sourceFeedName = ConstantExpressionUtil.getStringArgument(f, 1);
String getTargetFeed = ConstantExpressionUtil.getStringArgument(f, 2);
String subscriptionLocation = ConstantExpressionUtil.getStringArgument(f, 3);
String targetDataset = ConstantExpressionUtil.getStringArgument(f, 4);
String outputType = ConstantExpressionUtil.getStringArgument(f, 5);
MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider();
DataSourceId asid = new DataSourceId(dataverse, getTargetFeed);
String policyName = metadataProvider.getConfig().get(FeedActivityDetails.FEED_POLICY_NAME);
FeedPolicyEntity policy = metadataProvider.findFeedPolicy(dataverse, policyName);
if (policy == null) {
policy = BuiltinFeedPolicies.getFeedPolicy(policyName);
if (policy == null) {
throw new AlgebricksException("Unknown feed policy:" + policyName);
}
}
ArrayList<LogicalVariable> feedDataScanOutputVariables = new ArrayList<>();
String csLocations = metadataProvider.getConfig().get(FeedActivityDetails.COLLECT_LOCATIONS);
List<LogicalVariable> pkVars = new ArrayList<>();
FeedDataSource ds = createFeedDataSource(asid, targetDataset, sourceFeedName, subscriptionLocation, metadataProvider, policy, outputType, csLocations, unnest.getVariable(), context, pkVars);
// The order for feeds is <Record-Meta-PK>
feedDataScanOutputVariables.add(unnest.getVariable());
// Does it produce meta?
if (ds.hasMeta()) {
feedDataScanOutputVariables.add(context.newVar());
}
// Does it produce pk?
if (ds.isChange()) {
feedDataScanOutputVariables.addAll(pkVars);
}
DataSourceScanOperator scan = new DataSourceScanOperator(feedDataScanOutputVariables, ds);
List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs();
scanInpList.addAll(unnest.getInputs());
opRef.setValue(scan);
context.computeAndSetTypeEnvironmentForOperator(scan);
return true;
}
return false;
}
use of org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression in project asterixdb by apache.
the class AbstractIntroduceAccessMethodRule method pruneIndexCandidates.
/**
* Removes irrelevant access methods candidates, based on whether the
* expressions in the query match those in the index. For example, some
* index may require all its expressions to be matched, and some indexes may
* only require a match on a prefix of fields to be applicable. This methods
* removes all index candidates indexExprs that are definitely not
* applicable according to the expressions involved.
*
* @throws AlgebricksException
*/
public void pruneIndexCandidates(IAccessMethod accessMethod, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, IVariableTypeEnvironment typeEnvironment) throws AlgebricksException {
Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexExprAndVarIt = analysisCtx.getIteratorForIndexExprsAndVars();
// Used to keep track of matched expressions (added for prefix search)
int numMatchedKeys = 0;
ArrayList<Integer> matchedExpressions = new ArrayList<>();
while (indexExprAndVarIt.hasNext()) {
Map.Entry<Index, List<Pair<Integer, Integer>>> indexExprAndVarEntry = indexExprAndVarIt.next();
Index index = indexExprAndVarEntry.getKey();
boolean allUsed = true;
int lastFieldMatched = -1;
boolean foundKeyField = false;
matchedExpressions.clear();
numMatchedKeys = 0;
for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
List<String> keyField = index.getKeyFieldNames().get(i);
final IAType keyType = index.getKeyFieldTypes().get(i);
Iterator<Pair<Integer, Integer>> exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
final Pair<Integer, Integer> exprAndVarIdx = exprsAndVarIter.next();
final IOptimizableFuncExpr optFuncExpr = analysisCtx.getMatchedFuncExpr(exprAndVarIdx.first);
// expr and continue.
if (!accessMethod.exprIsOptimizable(index, optFuncExpr)) {
exprsAndVarIter.remove();
continue;
}
boolean typeMatch = true;
//Prune indexes based on field types
List<IAType> matchedTypes = new ArrayList<>();
//retrieve types of expressions joined/selected with an indexed field
for (int j = 0; j < optFuncExpr.getNumLogicalVars(); j++) {
if (j != exprAndVarIdx.second) {
matchedTypes.add(optFuncExpr.getFieldType(j));
}
}
if (matchedTypes.size() < 2 && optFuncExpr.getNumLogicalVars() == 1) {
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getConstantExpr(0), context.getMetadataProvider(), typeEnvironment));
}
//infer type of logicalExpr based on index keyType
matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getLogicalExpr(exprAndVarIdx.second), null, new IVariableTypeEnvironment() {
@Override
public Object getVarType(LogicalVariable var) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariables, List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
return keyType;
}
throw new IllegalArgumentException();
}
@Override
public void setVarType(LogicalVariable var, Object type) {
throw new IllegalArgumentException();
}
@Override
public Object getType(ILogicalExpression expr) throws AlgebricksException {
return ExpressionTypeComputer.INSTANCE.getType(expr, null, this);
}
@Override
public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
throw new IllegalArgumentException();
}
}));
//for the case when jaccard similarity is measured between ordered & unordered lists
boolean jaccardSimilarity = optFuncExpr.getFuncExpr().getFunctionIdentifier().getName().startsWith("similarity-jaccard-check");
// Full-text search consideration: an (un)ordered list of string type can be compatible with string
// type. i.e. an (un)ordered list can be provided as arguments to a string type field index.
List<IAType> elementTypes = matchedTypes;
if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS_WO_OPTION) {
for (int j = 0; j < matchedTypes.size(); j++) {
if (matchedTypes.get(j).getTypeTag() == ATypeTag.ARRAY || matchedTypes.get(j).getTypeTag() == ATypeTag.MULTISET) {
elementTypes.set(j, ((AbstractCollectionType) matchedTypes.get(j)).getItemType());
}
}
}
for (int j = 0; j < matchedTypes.size(); j++) {
for (int k = j + 1; k < matchedTypes.size(); k++) {
typeMatch &= isMatched(elementTypes.get(j), elementTypes.get(k), jaccardSimilarity);
}
}
// Check if any field name in the optFuncExpr matches.
if (optFuncExpr.findFieldName(keyField) != -1) {
foundKeyField = typeMatch && optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).hasDataSourceScan();
if (foundKeyField) {
matchedExpressions.add(exprAndVarIdx.first);
numMatchedKeys++;
if (lastFieldMatched == i - 1) {
lastFieldMatched = i;
}
break;
}
}
}
if (!foundKeyField) {
allUsed = false;
// if any expression was matched, remove the non-matched expressions, otherwise the index is unusable
if (lastFieldMatched >= 0) {
exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
while (exprsAndVarIter.hasNext()) {
if (!matchedExpressions.contains(exprsAndVarIter.next().first)) {
exprsAndVarIter.remove();
}
}
}
break;
}
}
// are not, remove this candidate.
if (!allUsed && accessMethod.matchAllIndexExprs()) {
indexExprAndVarIt.remove();
continue;
}
// A prefix of the index exprs may have been matched.
if (accessMethod.matchPrefixIndexExprs()) {
if (lastFieldMatched < 0) {
indexExprAndVarIt.remove();
continue;
}
}
analysisCtx.putNumberOfMatchedKeys(index, Integer.valueOf(numMatchedKeys));
}
}
Aggregations