use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.
the class InvertedIndexPOperator method buildInvertedIndexRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
try {
IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
int numPrimaryKeys = dataset.getPrimaryKeys().size();
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
if (secondaryIndex == null) {
throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
}
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
// TODO: Here we assume there is only one search key field.
int queryField = keyFields[0];
// Get tokenizer and search modifier factories.
IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
} catch (MetadataException e) {
throw new AlgebricksException(e);
}
}
use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.
the class FuzzyUtils method getSimThreshold.
public static IAObject getSimThreshold(MetadataProvider metadata, String simFuncName) {
String simThresholValue = metadata.getPropertyValue(SIM_THRESHOLD_PROP_NAME);
IAObject ret = null;
if (simFuncName.equals(JACCARD_FUNCTION_NAME)) {
if (simThresholValue != null) {
float jaccThresh = Float.parseFloat(simThresholValue);
ret = new AFloat(jaccThresh);
} else {
ret = new AFloat(JACCARD_DEFAULT_SIM_THRESHOLD);
}
} else if (simFuncName.equals(EDIT_DISTANCE_FUNCTION_NAME)) {
if (simThresholValue != null) {
int edThresh = Integer.parseInt(simThresholValue);
ret = new AInt32(edThresh);
} else {
ret = new AFloat(EDIT_DISTANCE_DEFAULT_SIM_THRESHOLD);
}
}
return ret;
}
use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.
the class PushFieldAccessRule method propagateFieldAccessRec.
@SuppressWarnings("unchecked")
private boolean propagateFieldAccessRec(Mutable<ILogicalOperator> opRef, IOptimizationContext context, String finalAnnot) throws AlgebricksException {
AssignOperator access = (AssignOperator) opRef.getValue();
Mutable<ILogicalOperator> opRef2 = access.getInputs().get(0);
AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
// rewritten into index search.
if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT || context.checkAndAddToAlreadyCompared(access, op2) && !(op2.getOperatorTag() == LogicalOperatorTag.SELECT && isAccessToIndexedField(access, context))) {
return false;
}
Object annotation = op2.getAnnotations().get(IS_MOVABLE);
if (annotation != null && !((Boolean) annotation)) {
return false;
}
if (tryingToPushThroughSelectionWithSameDataSource(access, op2)) {
return false;
}
if (testAndModifyRedundantOp(access, op2)) {
propagateFieldAccessRec(opRef2, context, finalAnnot);
return true;
}
List<LogicalVariable> usedInAccess = new LinkedList<>();
VariableUtilities.getUsedVariables(access, usedInAccess);
List<LogicalVariable> produced2 = new LinkedList<>();
if (op2.getOperatorTag() == LogicalOperatorTag.GROUP) {
VariableUtilities.getLiveVariables(op2, produced2);
} else {
VariableUtilities.getProducedVariables(op2, produced2);
}
boolean pushItDown = false;
List<LogicalVariable> inter = new ArrayList<>(usedInAccess);
if (inter.isEmpty()) {
// ground value
return false;
}
inter.retainAll(produced2);
if (inter.isEmpty()) {
pushItDown = true;
} else if (op2.getOperatorTag() == LogicalOperatorTag.GROUP) {
GroupByOperator g = (GroupByOperator) op2;
List<Pair<LogicalVariable, LogicalVariable>> varMappings = new ArrayList<>();
for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : g.getDecorList()) {
ILogicalExpression e = p.second.getValue();
if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
LogicalVariable decorVar = GroupByOperator.getDecorVariable(p);
if (inter.contains(decorVar)) {
inter.remove(decorVar);
LogicalVariable v1 = ((VariableReferenceExpression) e).getVariableReference();
varMappings.add(new Pair<>(decorVar, v1));
}
}
}
if (inter.isEmpty()) {
boolean changed = false;
for (Pair<LogicalVariable, LogicalVariable> m : varMappings) {
LogicalVariable v2 = context.newVar();
LogicalVariable oldVar = access.getVariables().get(0);
g.getDecorList().add(new Pair<LogicalVariable, Mutable<ILogicalExpression>>(oldVar, new MutableObject<ILogicalExpression>(new VariableReferenceExpression(v2))));
changed = true;
access.getVariables().set(0, v2);
VariableUtilities.substituteVariables(access, m.first, m.second, context);
}
if (changed) {
context.computeAndSetTypeEnvironmentForOperator(g);
}
usedInAccess.clear();
VariableUtilities.getUsedVariables(access, usedInAccess);
pushItDown = true;
}
}
if (pushItDown) {
if (op2.getOperatorTag() == LogicalOperatorTag.NESTEDTUPLESOURCE) {
Mutable<ILogicalOperator> childOfSubplan = ((NestedTupleSourceOperator) op2).getDataSourceReference().getValue().getInputs().get(0);
pushAccessDown(opRef, op2, childOfSubplan, context, finalAnnot);
return true;
}
if (op2.getInputs().size() == 1 && !op2.hasNestedPlans()) {
pushAccessDown(opRef, op2, op2.getInputs().get(0), context, finalAnnot);
return true;
} else {
for (Mutable<ILogicalOperator> inp : op2.getInputs()) {
HashSet<LogicalVariable> v2 = new HashSet<>();
VariableUtilities.getLiveVariables(inp.getValue(), v2);
if (v2.containsAll(usedInAccess)) {
pushAccessDown(opRef, op2, inp, context, finalAnnot);
return true;
}
}
}
if (op2.hasNestedPlans()) {
AbstractOperatorWithNestedPlans nestedOp = (AbstractOperatorWithNestedPlans) op2;
for (ILogicalPlan plan : nestedOp.getNestedPlans()) {
for (Mutable<ILogicalOperator> root : plan.getRoots()) {
HashSet<LogicalVariable> v2 = new HashSet<>();
VariableUtilities.getLiveVariables(root.getValue(), v2);
if (v2.containsAll(usedInAccess)) {
pushAccessDown(opRef, op2, root, context, finalAnnot);
return true;
}
}
}
}
throw new AlgebricksException("Field access " + access.getExpressions().get(0).getValue() + " does not correspond to any input of operator " + op2);
} else {
// fields. If yes, we can equate the two variables.
if (op2.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator scan = (DataSourceScanOperator) op2;
int n = scan.getVariables().size();
LogicalVariable scanRecordVar = scan.getVariables().get(n - 1);
AbstractFunctionCallExpression accessFun = (AbstractFunctionCallExpression) access.getExpressions().get(0).getValue();
ILogicalExpression e0 = accessFun.getArguments().get(0).getValue();
LogicalExpressionTag tag = e0.getExpressionTag();
if (tag == LogicalExpressionTag.VARIABLE) {
VariableReferenceExpression varRef = (VariableReferenceExpression) e0;
if (varRef.getVariableReference() == scanRecordVar) {
ILogicalExpression e1 = accessFun.getArguments().get(1).getValue();
if (e1.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
IDataSource<DataSourceId> dataSource = (IDataSource<DataSourceId>) scan.getDataSource();
byte dsType = ((DataSource) dataSource).getDatasourceType();
if (dsType == DataSource.Type.FEED || dsType == DataSource.Type.LOADABLE) {
return false;
}
DataSourceId asid = dataSource.getId();
MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
Dataset dataset = mp.findDataset(asid.getDataverseName(), asid.getDatasourceName());
if (dataset == null) {
throw new AlgebricksException("Dataset " + asid.getDatasourceName() + " not found.");
}
if (dataset.getDatasetType() != DatasetType.INTERNAL) {
setAsFinal(access, context, finalAnnot);
return false;
}
ConstantExpression ce = (ConstantExpression) e1;
IAObject obj = ((AsterixConstantValue) ce.getValue()).getObject();
String fldName;
if (obj.getType().getTypeTag() == ATypeTag.STRING) {
fldName = ((AString) obj).getStringValue();
} else {
int pos = ((AInt32) obj).getIntegerValue();
String tName = dataset.getItemTypeName();
IAType t = mp.findType(dataset.getItemTypeDataverseName(), tName);
if (t.getTypeTag() != ATypeTag.OBJECT) {
return false;
}
ARecordType rt = (ARecordType) t;
if (pos >= rt.getFieldNames().length) {
setAsFinal(access, context, finalAnnot);
return false;
}
fldName = rt.getFieldNames()[pos];
}
int p = DatasetUtil.getPositionOfPartitioningKeyField(dataset, fldName);
if (p < 0) {
// not one of the partitioning fields
setAsFinal(access, context, finalAnnot);
return false;
}
LogicalVariable keyVar = scan.getVariables().get(p);
access.getExpressions().get(0).setValue(new VariableReferenceExpression(keyVar));
return true;
}
}
}
}
setAsFinal(access, context, finalAnnot);
return false;
}
}
use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.
the class SimilarityCheckRule method getSimilarityCheckExpr.
private ScalarFunctionCallExpression getSimilarityCheckExpr(FunctionIdentifier normFuncIdent, AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) throws AlgebricksException {
// Remember args from original similarity function to add them to the similarity-check function later.
ArrayList<Mutable<ILogicalExpression>> similarityArgs = null;
ScalarFunctionCallExpression simCheckFuncExpr = null;
// Look for jaccard function call, and GE or GT.
if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.SIMILARITY_JACCARD) {
IAObject jaccThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.GE) {
if (constVal.getObject() instanceof AFloat) {
jaccThresh = constVal.getObject();
} else {
jaccThresh = new AFloat((float) ((ADouble) constVal.getObject()).getDoubleValue());
}
} else if (normFuncIdent == AlgebricksBuiltinFunctions.GT) {
float threshVal = 0.0f;
if (constVal.getObject() instanceof AFloat) {
threshVal = ((AFloat) constVal.getObject()).getFloatValue();
} else {
threshVal = (float) ((ADouble) constVal.getObject()).getDoubleValue();
}
float f = threshVal + Float.MIN_VALUE;
if (f > 1.0f)
f = 1.0f;
jaccThresh = new AFloat(f);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(jaccThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.SIMILARITY_JACCARD_CHECK), similarityArgs);
}
// Look for edit-distance function call, and LE or LT.
if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE) {
AInt32 aInt = new AInt32(0);
try {
aInt = (AInt32) ATypeHierarchy.convertNumericTypeObject(constVal.getObject(), ATypeTag.INTEGER);
} catch (HyracksDataException e) {
throw new AlgebricksException(e);
}
AInt32 edThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.LE) {
edThresh = aInt;
} else if (normFuncIdent == AlgebricksBuiltinFunctions.LT) {
int ed = aInt.getIntegerValue() - 1;
if (ed < 0)
ed = 0;
edThresh = new AInt32(ed);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(edThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.EDIT_DISTANCE_CHECK), similarityArgs);
}
// Preserve all annotations.
if (simCheckFuncExpr != null) {
simCheckFuncExpr.getAnnotations().putAll(funcExpr.getAnnotations());
}
return simCheckFuncExpr;
}
use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.
the class AccessMethodUtils method checkEachElementInFTSearchListPredicate.
/**
* Fetches each element and calls the check for the type and value in the given list using the given cursor.
*/
private static void checkEachElementInFTSearchListPredicate(IACursor oListCursor) throws AlgebricksException {
String argValue;
IAObject element;
while (oListCursor.next()) {
element = oListCursor.get();
if (element.getType() == BuiltinType.ASTRING) {
argValue = ConstantExpressionUtil.getStringConstant(element);
checkAndGenerateFTSearchExceptionForStringPhrase(argValue);
} else {
throw new CompilationException(ErrorCode.COMPILATION_TYPE_UNSUPPORTED, BuiltinFunctions.FULLTEXT_CONTAINS.getName(), element.getType().getTypeTag());
}
}
}
Aggregations