Search in sources :

Example 6 with IAObject

use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.

the class InvertedIndexPOperator method buildInvertedIndexRuntime.

public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
    try {
        IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
        int numPrimaryKeys = dataset.getPrimaryKeys().size();
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        if (secondaryIndex == null) {
            throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
        }
        IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
        RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
        // TODO: Here we assume there is only one search key field.
        int queryField = keyFields[0];
        // Get tokenizer and search modifier factories.
        IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
        IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
        IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
        LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
        return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IAObject(org.apache.asterix.om.base.IAObject) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) IInvertedIndexSearchModifierFactory(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifierFactory) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) LSMInvertedIndexSearchOperatorDescriptor(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorDescriptor) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 7 with IAObject

use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.

the class FuzzyUtils method getSimThreshold.

public static IAObject getSimThreshold(MetadataProvider metadata, String simFuncName) {
    String simThresholValue = metadata.getPropertyValue(SIM_THRESHOLD_PROP_NAME);
    IAObject ret = null;
    if (simFuncName.equals(JACCARD_FUNCTION_NAME)) {
        if (simThresholValue != null) {
            float jaccThresh = Float.parseFloat(simThresholValue);
            ret = new AFloat(jaccThresh);
        } else {
            ret = new AFloat(JACCARD_DEFAULT_SIM_THRESHOLD);
        }
    } else if (simFuncName.equals(EDIT_DISTANCE_FUNCTION_NAME)) {
        if (simThresholValue != null) {
            int edThresh = Integer.parseInt(simThresholValue);
            ret = new AInt32(edThresh);
        } else {
            ret = new AFloat(EDIT_DISTANCE_DEFAULT_SIM_THRESHOLD);
        }
    }
    return ret;
}
Also used : AFloat(org.apache.asterix.om.base.AFloat) IAObject(org.apache.asterix.om.base.IAObject) AInt32(org.apache.asterix.om.base.AInt32)

Example 8 with IAObject

use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.

the class PushFieldAccessRule method propagateFieldAccessRec.

@SuppressWarnings("unchecked")
private boolean propagateFieldAccessRec(Mutable<ILogicalOperator> opRef, IOptimizationContext context, String finalAnnot) throws AlgebricksException {
    AssignOperator access = (AssignOperator) opRef.getValue();
    Mutable<ILogicalOperator> opRef2 = access.getInputs().get(0);
    AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
    // rewritten into index search.
    if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT || context.checkAndAddToAlreadyCompared(access, op2) && !(op2.getOperatorTag() == LogicalOperatorTag.SELECT && isAccessToIndexedField(access, context))) {
        return false;
    }
    Object annotation = op2.getAnnotations().get(IS_MOVABLE);
    if (annotation != null && !((Boolean) annotation)) {
        return false;
    }
    if (tryingToPushThroughSelectionWithSameDataSource(access, op2)) {
        return false;
    }
    if (testAndModifyRedundantOp(access, op2)) {
        propagateFieldAccessRec(opRef2, context, finalAnnot);
        return true;
    }
    List<LogicalVariable> usedInAccess = new LinkedList<>();
    VariableUtilities.getUsedVariables(access, usedInAccess);
    List<LogicalVariable> produced2 = new LinkedList<>();
    if (op2.getOperatorTag() == LogicalOperatorTag.GROUP) {
        VariableUtilities.getLiveVariables(op2, produced2);
    } else {
        VariableUtilities.getProducedVariables(op2, produced2);
    }
    boolean pushItDown = false;
    List<LogicalVariable> inter = new ArrayList<>(usedInAccess);
    if (inter.isEmpty()) {
        // ground value
        return false;
    }
    inter.retainAll(produced2);
    if (inter.isEmpty()) {
        pushItDown = true;
    } else if (op2.getOperatorTag() == LogicalOperatorTag.GROUP) {
        GroupByOperator g = (GroupByOperator) op2;
        List<Pair<LogicalVariable, LogicalVariable>> varMappings = new ArrayList<>();
        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : g.getDecorList()) {
            ILogicalExpression e = p.second.getValue();
            if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                LogicalVariable decorVar = GroupByOperator.getDecorVariable(p);
                if (inter.contains(decorVar)) {
                    inter.remove(decorVar);
                    LogicalVariable v1 = ((VariableReferenceExpression) e).getVariableReference();
                    varMappings.add(new Pair<>(decorVar, v1));
                }
            }
        }
        if (inter.isEmpty()) {
            boolean changed = false;
            for (Pair<LogicalVariable, LogicalVariable> m : varMappings) {
                LogicalVariable v2 = context.newVar();
                LogicalVariable oldVar = access.getVariables().get(0);
                g.getDecorList().add(new Pair<LogicalVariable, Mutable<ILogicalExpression>>(oldVar, new MutableObject<ILogicalExpression>(new VariableReferenceExpression(v2))));
                changed = true;
                access.getVariables().set(0, v2);
                VariableUtilities.substituteVariables(access, m.first, m.second, context);
            }
            if (changed) {
                context.computeAndSetTypeEnvironmentForOperator(g);
            }
            usedInAccess.clear();
            VariableUtilities.getUsedVariables(access, usedInAccess);
            pushItDown = true;
        }
    }
    if (pushItDown) {
        if (op2.getOperatorTag() == LogicalOperatorTag.NESTEDTUPLESOURCE) {
            Mutable<ILogicalOperator> childOfSubplan = ((NestedTupleSourceOperator) op2).getDataSourceReference().getValue().getInputs().get(0);
            pushAccessDown(opRef, op2, childOfSubplan, context, finalAnnot);
            return true;
        }
        if (op2.getInputs().size() == 1 && !op2.hasNestedPlans()) {
            pushAccessDown(opRef, op2, op2.getInputs().get(0), context, finalAnnot);
            return true;
        } else {
            for (Mutable<ILogicalOperator> inp : op2.getInputs()) {
                HashSet<LogicalVariable> v2 = new HashSet<>();
                VariableUtilities.getLiveVariables(inp.getValue(), v2);
                if (v2.containsAll(usedInAccess)) {
                    pushAccessDown(opRef, op2, inp, context, finalAnnot);
                    return true;
                }
            }
        }
        if (op2.hasNestedPlans()) {
            AbstractOperatorWithNestedPlans nestedOp = (AbstractOperatorWithNestedPlans) op2;
            for (ILogicalPlan plan : nestedOp.getNestedPlans()) {
                for (Mutable<ILogicalOperator> root : plan.getRoots()) {
                    HashSet<LogicalVariable> v2 = new HashSet<>();
                    VariableUtilities.getLiveVariables(root.getValue(), v2);
                    if (v2.containsAll(usedInAccess)) {
                        pushAccessDown(opRef, op2, root, context, finalAnnot);
                        return true;
                    }
                }
            }
        }
        throw new AlgebricksException("Field access " + access.getExpressions().get(0).getValue() + " does not correspond to any input of operator " + op2);
    } else {
        // fields. If yes, we can equate the two variables.
        if (op2.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
            DataSourceScanOperator scan = (DataSourceScanOperator) op2;
            int n = scan.getVariables().size();
            LogicalVariable scanRecordVar = scan.getVariables().get(n - 1);
            AbstractFunctionCallExpression accessFun = (AbstractFunctionCallExpression) access.getExpressions().get(0).getValue();
            ILogicalExpression e0 = accessFun.getArguments().get(0).getValue();
            LogicalExpressionTag tag = e0.getExpressionTag();
            if (tag == LogicalExpressionTag.VARIABLE) {
                VariableReferenceExpression varRef = (VariableReferenceExpression) e0;
                if (varRef.getVariableReference() == scanRecordVar) {
                    ILogicalExpression e1 = accessFun.getArguments().get(1).getValue();
                    if (e1.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
                        IDataSource<DataSourceId> dataSource = (IDataSource<DataSourceId>) scan.getDataSource();
                        byte dsType = ((DataSource) dataSource).getDatasourceType();
                        if (dsType == DataSource.Type.FEED || dsType == DataSource.Type.LOADABLE) {
                            return false;
                        }
                        DataSourceId asid = dataSource.getId();
                        MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
                        Dataset dataset = mp.findDataset(asid.getDataverseName(), asid.getDatasourceName());
                        if (dataset == null) {
                            throw new AlgebricksException("Dataset " + asid.getDatasourceName() + " not found.");
                        }
                        if (dataset.getDatasetType() != DatasetType.INTERNAL) {
                            setAsFinal(access, context, finalAnnot);
                            return false;
                        }
                        ConstantExpression ce = (ConstantExpression) e1;
                        IAObject obj = ((AsterixConstantValue) ce.getValue()).getObject();
                        String fldName;
                        if (obj.getType().getTypeTag() == ATypeTag.STRING) {
                            fldName = ((AString) obj).getStringValue();
                        } else {
                            int pos = ((AInt32) obj).getIntegerValue();
                            String tName = dataset.getItemTypeName();
                            IAType t = mp.findType(dataset.getItemTypeDataverseName(), tName);
                            if (t.getTypeTag() != ATypeTag.OBJECT) {
                                return false;
                            }
                            ARecordType rt = (ARecordType) t;
                            if (pos >= rt.getFieldNames().length) {
                                setAsFinal(access, context, finalAnnot);
                                return false;
                            }
                            fldName = rt.getFieldNames()[pos];
                        }
                        int p = DatasetUtil.getPositionOfPartitioningKeyField(dataset, fldName);
                        if (p < 0) {
                            // not one of the partitioning fields
                            setAsFinal(access, context, finalAnnot);
                            return false;
                        }
                        LogicalVariable keyVar = scan.getVariables().get(p);
                        access.getExpressions().get(0).setValue(new VariableReferenceExpression(keyVar));
                        return true;
                    }
                }
            }
        }
        setAsFinal(access, context, finalAnnot);
        return false;
    }
}
Also used : ConstantExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression) ArrayList(java.util.ArrayList) AString(org.apache.asterix.om.base.AString) DataSourceScanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Pair(org.apache.hyracks.algebricks.common.utils.Pair) HashSet(java.util.HashSet) AbstractOperatorWithNestedPlans(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) IAObject(org.apache.asterix.om.base.IAObject) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) LinkedList(java.util.LinkedList) AInt32(org.apache.asterix.om.base.AInt32) IDataSource(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource) IDataSource(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource) DataSource(org.apache.asterix.metadata.declared.DataSource) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) LogicalExpressionTag(org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) IAObject(org.apache.asterix.om.base.IAObject) MutableObject(org.apache.commons.lang3.mutable.MutableObject) ARecordType(org.apache.asterix.om.types.ARecordType) DataSourceId(org.apache.asterix.metadata.declared.DataSourceId) IAType(org.apache.asterix.om.types.IAType)

Example 9 with IAObject

use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.

the class SimilarityCheckRule method getSimilarityCheckExpr.

private ScalarFunctionCallExpression getSimilarityCheckExpr(FunctionIdentifier normFuncIdent, AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) throws AlgebricksException {
    // Remember args from original similarity function to add them to the similarity-check function later.
    ArrayList<Mutable<ILogicalExpression>> similarityArgs = null;
    ScalarFunctionCallExpression simCheckFuncExpr = null;
    // Look for jaccard function call, and GE or GT.
    if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.SIMILARITY_JACCARD) {
        IAObject jaccThresh;
        if (normFuncIdent == AlgebricksBuiltinFunctions.GE) {
            if (constVal.getObject() instanceof AFloat) {
                jaccThresh = constVal.getObject();
            } else {
                jaccThresh = new AFloat((float) ((ADouble) constVal.getObject()).getDoubleValue());
            }
        } else if (normFuncIdent == AlgebricksBuiltinFunctions.GT) {
            float threshVal = 0.0f;
            if (constVal.getObject() instanceof AFloat) {
                threshVal = ((AFloat) constVal.getObject()).getFloatValue();
            } else {
                threshVal = (float) ((ADouble) constVal.getObject()).getDoubleValue();
            }
            float f = threshVal + Float.MIN_VALUE;
            if (f > 1.0f)
                f = 1.0f;
            jaccThresh = new AFloat(f);
        } else {
            return null;
        }
        similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
        similarityArgs.addAll(funcExpr.getArguments());
        similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(jaccThresh))));
        simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.SIMILARITY_JACCARD_CHECK), similarityArgs);
    }
    // Look for edit-distance function call, and LE or LT.
    if (funcExpr.getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE) {
        AInt32 aInt = new AInt32(0);
        try {
            aInt = (AInt32) ATypeHierarchy.convertNumericTypeObject(constVal.getObject(), ATypeTag.INTEGER);
        } catch (HyracksDataException e) {
            throw new AlgebricksException(e);
        }
        AInt32 edThresh;
        if (normFuncIdent == AlgebricksBuiltinFunctions.LE) {
            edThresh = aInt;
        } else if (normFuncIdent == AlgebricksBuiltinFunctions.LT) {
            int ed = aInt.getIntegerValue() - 1;
            if (ed < 0)
                ed = 0;
            edThresh = new AInt32(ed);
        } else {
            return null;
        }
        similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
        similarityArgs.addAll(funcExpr.getArguments());
        similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(edThresh))));
        simCheckFuncExpr = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.EDIT_DISTANCE_CHECK), similarityArgs);
    }
    // Preserve all annotations.
    if (simCheckFuncExpr != null) {
        simCheckFuncExpr.getAnnotations().putAll(funcExpr.getAnnotations());
    }
    return simCheckFuncExpr;
}
Also used : Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) AFloat(org.apache.asterix.om.base.AFloat) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) IAObject(org.apache.asterix.om.base.IAObject) ConstantExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) AInt32(org.apache.asterix.om.base.AInt32) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression)

Example 10 with IAObject

use of org.apache.asterix.om.base.IAObject in project asterixdb by apache.

the class AccessMethodUtils method checkEachElementInFTSearchListPredicate.

/**
     * Fetches each element and calls the check for the type and value in the given list using the given cursor.
     */
private static void checkEachElementInFTSearchListPredicate(IACursor oListCursor) throws AlgebricksException {
    String argValue;
    IAObject element;
    while (oListCursor.next()) {
        element = oListCursor.get();
        if (element.getType() == BuiltinType.ASTRING) {
            argValue = ConstantExpressionUtil.getStringConstant(element);
            checkAndGenerateFTSearchExceptionForStringPhrase(argValue);
        } else {
            throw new CompilationException(ErrorCode.COMPILATION_TYPE_UNSUPPORTED, BuiltinFunctions.FULLTEXT_CONTAINS.getName(), element.getType().getTypeTag());
        }
    }
}
Also used : CompilationException(org.apache.asterix.common.exceptions.CompilationException) IAObject(org.apache.asterix.om.base.IAObject) AString(org.apache.asterix.om.base.AString)

Aggregations

IAObject (org.apache.asterix.om.base.IAObject)27 AsterixConstantValue (org.apache.asterix.om.constants.AsterixConstantValue)16 AString (org.apache.asterix.om.base.AString)12 ConstantExpression (org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression)10 ATypeTag (org.apache.asterix.om.types.ATypeTag)9 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)8 ArrayList (java.util.ArrayList)7 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)7 AOrderedList (org.apache.asterix.om.base.AOrderedList)5 IAType (org.apache.asterix.om.types.IAType)5 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)5 ScalarFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression)5 AInt32 (org.apache.asterix.om.base.AInt32)4 AbstractFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression)4 DataOutput (java.io.DataOutput)3 ARecordType (org.apache.asterix.om.types.ARecordType)3 Mutable (org.apache.commons.lang3.mutable.Mutable)3 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)3 DataInput (java.io.DataInput)2 IOException (java.io.IOException)2