Search in sources :

Example 26 with Index

use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.

the class DatasetDataSource method buildDatasourceScanRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    switch(dataset.getDatasetType()) {
        case EXTERNAL:
            Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset();
            String itemTypeName = externalDataset.getItemTypeName();
            IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
            ExternalDatasetDetails edd = (ExternalDatasetDetails) externalDataset.getDatasetDetails();
            IAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset, edd.getAdapter(), edd.getProperties(), (ARecordType) itemType, null);
            return metadataProvider.buildExternalDatasetDataScannerRuntime(jobSpec, itemType, adapterFactory, NonTaggedDataFormat.INSTANCE);
        case INTERNAL:
            DataSourceId id = getId();
            String dataverseName = id.getDataverseName();
            String datasetName = id.getDatasourceName();
            Index primaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, datasetName);
            int[] minFilterFieldIndexes = createFilterIndexes(minFilterVars, opSchema);
            int[] maxFilterFieldIndexes = createFilterIndexes(maxFilterVars, opSchema);
            return metadataProvider.buildBtreeRuntime(jobSpec, opSchema, typeEnv, context, true, false, ((DatasetDataSource) dataSource).getDataset(), primaryIndex.getIndexName(), null, null, true, true, minFilterFieldIndexes, maxFilterFieldIndexes);
        default:
            throw new AlgebricksException("Unknown datasource type");
    }
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IAdapterFactory(org.apache.asterix.external.api.IAdapterFactory) Index(org.apache.asterix.metadata.entities.Index) IAType(org.apache.asterix.om.types.IAType)

Example 27 with Index

use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.

the class IntroduceSecondaryIndexInsertDeleteRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
    if (op0.getOperatorTag() != LogicalOperatorTag.DELEGATE_OPERATOR && op0.getOperatorTag() != LogicalOperatorTag.SINK) {
        return false;
    }
    if (op0.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
        DelegateOperator eOp = (DelegateOperator) op0;
        if (!(eOp.getDelegate() instanceof CommitOperator)) {
            return false;
        }
    }
    AbstractLogicalOperator op1 = (AbstractLogicalOperator) op0.getInputs().get(0).getValue();
    if (op1.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
        return false;
    }
    /** find the record variable */
    InsertDeleteUpsertOperator primaryIndexModificationOp = (InsertDeleteUpsertOperator) op0.getInputs().get(0).getValue();
    boolean isBulkload = primaryIndexModificationOp.isBulkload();
    ILogicalExpression newRecordExpr = primaryIndexModificationOp.getPayloadExpression().getValue();
    List<Mutable<ILogicalExpression>> newMetaExprs = primaryIndexModificationOp.getAdditionalNonFilteringExpressions();
    LogicalVariable newRecordVar;
    LogicalVariable newMetaVar = null;
    /**
         * inputOp is the assign operator which extracts primary keys from the input
         * variables (record or meta)
         */
    AbstractLogicalOperator inputOp = (AbstractLogicalOperator) primaryIndexModificationOp.getInputs().get(0).getValue();
    newRecordVar = getRecordVar(context, inputOp, newRecordExpr, 0);
    if (newMetaExprs != null && !newMetaExprs.isEmpty()) {
        if (newMetaExprs.size() > 1) {
            throw new AlgebricksException("Number of meta records can't be more than 1. Number of meta records found = " + newMetaExprs.size());
        }
        newMetaVar = getRecordVar(context, inputOp, newMetaExprs.get(0).getValue(), 1);
    }
    /*
         * At this point, we have the record variable and the insert/delete/upsert operator
         * Note: We have two operators:
         * 1. An InsertDeleteOperator (primary)
         * 2. An IndexInsertDeleteOperator (secondary)
         * The current primaryIndexModificationOp is of the first type
         */
    DataSource datasetSource = (DataSource) primaryIndexModificationOp.getDataSource();
    MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
    String dataverseName = datasetSource.getId().getDataverseName();
    String datasetName = datasetSource.getId().getDatasourceName();
    Dataset dataset = mp.findDataset(dataverseName, datasetName);
    if (dataset == null) {
        throw new AlgebricksException("Unknown dataset " + datasetName + " in dataverse " + dataverseName);
    }
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        return false;
    }
    // Create operators for secondary index insert / delete.
    String itemTypeName = dataset.getItemTypeName();
    IAType itemType = mp.findType(dataset.getItemTypeDataverseName(), itemTypeName);
    if (itemType.getTypeTag() != ATypeTag.OBJECT) {
        throw new AlgebricksException("Only record types can be indexed.");
    }
    ARecordType recType = (ARecordType) itemType;
    // meta type
    ARecordType metaType = null;
    if (dataset.hasMetaPart()) {
        metaType = (ARecordType) mp.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
    }
    List<Index> indexes = mp.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName());
    // Set the top operator pointer to the primary IndexInsertDeleteOperator
    ILogicalOperator currentTop = primaryIndexModificationOp;
    boolean hasSecondaryIndex = false;
    // Put an n-gram or a keyword index in the later stage of index-update,
    // since TokenizeOperator needs to be involved.
    Collections.sort(indexes, (o1, o2) -> o1.getIndexType().ordinal() - o2.getIndexType().ordinal());
    // At this point, we have the data type info, and the indexes info as well
    int secondaryIndexTotalCnt = indexes.size() - 1;
    if (secondaryIndexTotalCnt > 0) {
        op0.getInputs().clear();
    } else {
        return false;
    }
    // Initialize inputs to the SINK operator Op0 (The SINK) is now without input
    // Prepare filtering field information (This is the filter created using the "filter with" key word in the
    // create dataset ddl)
    List<String> filteringFields = ((InternalDatasetDetails) dataset.getDatasetDetails()).getFilterField();
    List<LogicalVariable> filteringVars;
    List<Mutable<ILogicalExpression>> filteringExpressions = null;
    if (filteringFields != null) {
        // The filter field var already exists. we can simply get it from the insert op
        filteringVars = new ArrayList<>();
        filteringExpressions = new ArrayList<>();
        for (Mutable<ILogicalExpression> filteringExpression : primaryIndexModificationOp.getAdditionalFilteringExpressions()) {
            filteringExpression.getValue().getUsedVariables(filteringVars);
            for (LogicalVariable var : filteringVars) {
                filteringExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(var)));
            }
        }
    }
    // Replicate Operator is applied only when doing the bulk-load.
    ReplicateOperator replicateOp = null;
    if (secondaryIndexTotalCnt > 1 && primaryIndexModificationOp.isBulkload()) {
        // Split the logical plan into "each secondary index update branch"
        // to replicate each <PK,OBJECT> pair.
        replicateOp = new ReplicateOperator(secondaryIndexTotalCnt);
        replicateOp.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
        replicateOp.setExecutionMode(ExecutionMode.PARTITIONED);
        context.computeAndSetTypeEnvironmentForOperator(replicateOp);
        currentTop = replicateOp;
    }
    /*
         * The two maps are used to store variables to which [casted] field access is assigned.
         * One for the beforeOp record and the other for the new record.
         * There are two uses for these maps:
         * 1. used for shared fields in indexes with overlapping keys.
         * 2. used for setting variables of secondary keys for each secondary index operator.
         */
    Map<IndexFieldId, LogicalVariable> fieldVarsForBeforeOperation = new HashMap<>();
    Map<IndexFieldId, LogicalVariable> fieldVarsForNewRecord = new HashMap<>();
    /*
         * if the index is enforcing field types (For open indexes), We add a cast
         * operator to ensure type safety
         */
    try {
        if (primaryIndexModificationOp.getOperation() == Kind.INSERT || primaryIndexModificationOp.getOperation() == Kind.UPSERT || /* Actually, delete should not be here but it is now until issue
                     * https://issues.apache.org/jira/browse/ASTERIXDB-1507
                     * is solved
                     */
        primaryIndexModificationOp.getOperation() == Kind.DELETE) {
            injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForNewRecord, recType, metaType, newRecordVar, newMetaVar, primaryIndexModificationOp, false);
            if (replicateOp != null) {
                context.computeAndSetTypeEnvironmentForOperator(replicateOp);
            }
        }
        if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) /* Actually, delete should be here but it is not until issue
             * https://issues.apache.org/jira/browse/ASTERIXDB-1507
             * is solved
             */
        {
            List<LogicalVariable> beforeOpMetaVars = primaryIndexModificationOp.getBeforeOpAdditionalNonFilteringVars();
            LogicalVariable beforeOpMetaVar = beforeOpMetaVars == null ? null : beforeOpMetaVars.get(0);
            currentTop = injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForBeforeOperation, recType, metaType, primaryIndexModificationOp.getBeforeOpRecordVar(), beforeOpMetaVar, currentTop, true);
        }
    } catch (AsterixException e) {
        throw new AlgebricksException(e);
    }
    // At first, op1 is the index insert op insertOp
    for (Index index : indexes) {
        if (!index.isSecondaryIndex()) {
            continue;
        }
        hasSecondaryIndex = true;
        // Get the secondary fields names and types
        List<List<String>> secondaryKeyFields = index.getKeyFieldNames();
        List<LogicalVariable> secondaryKeyVars = new ArrayList<>();
        List<Mutable<ILogicalExpression>> secondaryExpressions = new ArrayList<>();
        List<Mutable<ILogicalExpression>> beforeOpSecondaryExpressions = new ArrayList<>();
        ILogicalOperator replicateOutput;
        for (int i = 0; i < secondaryKeyFields.size(); i++) {
            IndexFieldId indexFieldId = new IndexFieldId(index.getKeyFieldSourceIndicators().get(i), secondaryKeyFields.get(i));
            LogicalVariable skVar = fieldVarsForNewRecord.get(indexFieldId);
            secondaryKeyVars.add(skVar);
            secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(skVar)));
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(fieldVarsForBeforeOperation.get(indexFieldId))));
            }
        }
        IndexInsertDeleteUpsertOperator indexUpdate;
        if (index.getIndexType() != IndexType.RTREE) {
            // Create an expression per key
            Mutable<ILogicalExpression> filterExpression = (primaryIndexModificationOp.getOperation() == Kind.UPSERT) ? null : createFilterExpression(secondaryKeyVars, context.getOutputTypeEnvironment(currentTop), index.isEnforcingKeyFileds());
            DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
            // and index type is keyword or n-gram.
            if (index.getIndexType() != IndexType.BTREE && primaryIndexModificationOp.isBulkload()) {
                // Note: Bulk load case, we don't need to take care of it for upsert operation
                // Check whether the index is length-partitioned or not.
                // If partitioned, [input variables to TokenizeOperator,
                // token, number of token] pairs will be generated and
                // fed into the IndexInsertDeleteOperator.
                // If not, [input variables, token] pairs will be generated
                // and fed into the IndexInsertDeleteOperator.
                // Input variables are passed since TokenizeOperator is not an
                // filtering operator.
                boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
                // Create a new logical variable - token
                List<LogicalVariable> tokenizeKeyVars = new ArrayList<>();
                List<Mutable<ILogicalExpression>> tokenizeKeyExprs = new ArrayList<>();
                LogicalVariable tokenVar = context.newVar();
                tokenizeKeyVars.add(tokenVar);
                tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(tokenVar)));
                // Check the field type of the secondary key.
                IAType secondaryKeyType;
                Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
                secondaryKeyType = keyPairType.first;
                List<Object> varTypes = new ArrayList<>();
                varTypes.add(NonTaggedFormatUtil.getTokenType(secondaryKeyType));
                // The type is short, and this does not contain type info.
                if (isPartitioned) {
                    LogicalVariable lengthVar = context.newVar();
                    tokenizeKeyVars.add(lengthVar);
                    tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(lengthVar)));
                    varTypes.add(BuiltinType.SHORTWITHOUTTYPEINFO);
                }
                // TokenizeOperator to tokenize [SK, PK] pairs
                TokenizeOperator tokenUpdate = new TokenizeOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, tokenizeKeyVars, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), isPartitioned, varTypes);
                tokenUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
                context.computeAndSetTypeEnvironmentForOperator(tokenUpdate);
                replicateOutput = tokenUpdate;
                indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), tokenizeKeyExprs, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
                indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(tokenUpdate));
            } else {
                // When TokenizeOperator is not needed
                indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
                indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
                replicateOutput = indexUpdate;
                // We add the necessary expressions for upsert
                if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                    indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
                    if (filteringFields != null) {
                        indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
                    }
                }
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
            }
        } else {
            // Get type, dimensions and number of keys
            Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
            IAType spatialType = keyPairType.first;
            boolean isPointMBR = spatialType.getTypeTag() == ATypeTag.POINT || spatialType.getTypeTag() == ATypeTag.POINT3D;
            int dimension = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
            int numKeys = (isPointMBR && isBulkload) ? dimension : dimension * 2;
            // Get variables and expressions
            List<LogicalVariable> keyVarList = new ArrayList<>();
            List<Mutable<ILogicalExpression>> keyExprList = new ArrayList<>();
            for (int i = 0; i < numKeys; i++) {
                LogicalVariable keyVar = context.newVar();
                keyVarList.add(keyVar);
                AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVars.get(0))));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
                keyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
            }
            secondaryExpressions.clear();
            for (LogicalVariable secondaryKeyVar : keyVarList) {
                secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
            }
            if (isPointMBR && isBulkload) {
                //createFieldPermutationForBulkLoadOp(int) for more details.
                for (LogicalVariable secondaryKeyVar : keyVarList) {
                    secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
                }
            }
            AssignOperator assignCoordinates = new AssignOperator(keyVarList, keyExprList);
            assignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
            context.computeAndSetTypeEnvironmentForOperator(assignCoordinates);
            replicateOutput = assignCoordinates;
            Mutable<ILogicalExpression> filterExpression = null;
            AssignOperator originalAssignCoordinates = null;
            // We do something similar for beforeOp key if the operation is an upsert
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                List<LogicalVariable> originalKeyVarList = new ArrayList<>();
                List<Mutable<ILogicalExpression>> originalKeyExprList = new ArrayList<>();
                // we don't do any filtering since nulls are expected here and there
                for (int i = 0; i < numKeys; i++) {
                    LogicalVariable keyVar = context.newVar();
                    originalKeyVarList.add(keyVar);
                    AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
                    createMBR.getArguments().add(beforeOpSecondaryExpressions.get(0));
                    createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
                    createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
                    originalKeyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
                }
                beforeOpSecondaryExpressions.clear();
                for (LogicalVariable secondaryKeyVar : originalKeyVarList) {
                    beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
                }
                originalAssignCoordinates = new AssignOperator(originalKeyVarList, originalKeyExprList);
                originalAssignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
                context.computeAndSetTypeEnvironmentForOperator(originalAssignCoordinates);
            } else {
                // We must enforce the filter if the originating spatial type is
                // nullable.
                boolean forceFilter = keyPairType.second;
                filterExpression = createFilterExpression(keyVarList, context.getOutputTypeEnvironment(assignCoordinates), forceFilter);
            }
            DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
            indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
            indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                // set before op secondary key expressions
                if (filteringFields != null) {
                    indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
                }
                // set filtering expressions
                indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
                // assign --> assign beforeOp values --> secondary index upsert
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(originalAssignCoordinates));
            } else {
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
            }
        }
        context.computeAndSetTypeEnvironmentForOperator(indexUpdate);
        if (!primaryIndexModificationOp.isBulkload() || secondaryIndexTotalCnt == 1) {
            currentTop = indexUpdate;
        } else {
            replicateOp.getOutputs().add(new MutableObject<>(replicateOutput));
        }
        if (primaryIndexModificationOp.isBulkload()) {
            // For bulk load, we connect all fanned out insert operator to a single SINK operator
            op0.getInputs().add(new MutableObject<ILogicalOperator>(indexUpdate));
        }
    }
    if (!hasSecondaryIndex) {
        return false;
    }
    if (!primaryIndexModificationOp.isBulkload()) {
        // If this is an upsert, we need to
        // Remove the current input to the SINK operator (It is actually already removed above)
        op0.getInputs().clear();
        // Connect the last index update to the SINK
        op0.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
    }
    return true;
}
Also used : HashMap(java.util.HashMap) ConstantExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression) ArrayList(java.util.ArrayList) Index(org.apache.asterix.metadata.entities.Index) DataSourceIndex(org.apache.asterix.metadata.declared.DataSourceIndex) AString(org.apache.asterix.om.base.AString) AsterixException(org.apache.asterix.common.exceptions.AsterixException) TokenizeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.TokenizeOperator) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) DelegateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator) List(java.util.List) AOrderedList(org.apache.asterix.om.base.AOrderedList) ArrayList(java.util.ArrayList) CommitOperator(org.apache.asterix.algebra.operators.CommitOperator) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression) ReplicateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) DataSource(org.apache.asterix.metadata.declared.DataSource) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) MutableObject(org.apache.commons.lang3.mutable.MutableObject) IAObject(org.apache.asterix.om.base.IAObject) IAType(org.apache.asterix.om.types.IAType) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) DataSourceIndex(org.apache.asterix.metadata.declared.DataSourceIndex) IndexInsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteUpsertOperator) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) AInt32(org.apache.asterix.om.base.AInt32) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IndexInsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteUpsertOperator) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) ARecordType(org.apache.asterix.om.types.ARecordType)

Example 28 with Index

use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.

the class IntroduceSecondaryIndexInsertDeleteRule method injectFieldAccessesForIndexes.

private ILogicalOperator injectFieldAccessesForIndexes(IOptimizationContext context, Dataset dataset, List<Index> indexes, Map<IndexFieldId, LogicalVariable> fieldAccessVars, ARecordType recType, ARecordType metaType, LogicalVariable recordVar, LogicalVariable metaVar, ILogicalOperator currentTop, boolean afterOp) throws AlgebricksException {
    List<LogicalVariable> vars = new ArrayList<>();
    List<Mutable<ILogicalExpression>> exprs = new ArrayList<>();
    for (Index index : indexes) {
        if (index.isPrimaryIndex()) {
            continue;
        }
        List<IAType> skTypes = index.getKeyFieldTypes();
        List<List<String>> skNames = index.getKeyFieldNames();
        List<Integer> indicators = index.getKeyFieldSourceIndicators();
        for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
            IndexFieldId indexFieldId = new IndexFieldId(indicators.get(i), skNames.get(i));
            if (fieldAccessVars.containsKey(indexFieldId)) {
                // already handled in a different index
                continue;
            }
            ARecordType sourceType = dataset.hasMetaPart() ? indicators.get(i).intValue() == Index.RECORD_INDICATOR ? recType : metaType : recType;
            LogicalVariable sourceVar = dataset.hasMetaPart() ? indicators.get(i).intValue() == Index.RECORD_INDICATOR ? recordVar : metaVar : recordVar;
            LogicalVariable fieldVar = context.newVar();
            // create record variable ref
            Mutable<ILogicalExpression> varRef = new MutableObject<>(new VariableReferenceExpression(sourceVar));
            IAType fieldType = sourceType.getSubFieldType(indexFieldId.fieldName);
            AbstractFunctionCallExpression theFieldAccessFunc;
            if (fieldType == null) {
                // Open field. must prevent inlining to maintain the cast before the primaryOp and
                // make handling of records with incorrect value type for this field easier and cleaner
                context.addNotToBeInlinedVar(fieldVar);
                // create field access
                AbstractFunctionCallExpression fieldAccessFunc = getOpenOrNestedFieldAccessFunction(varRef, indexFieldId.fieldName);
                // create cast
                theFieldAccessFunc = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CAST_TYPE));
                // The first argument is the field
                theFieldAccessFunc.getArguments().add(new MutableObject<ILogicalExpression>(fieldAccessFunc));
                TypeCastUtils.setRequiredAndInputTypes(theFieldAccessFunc, skTypes.get(i), BuiltinType.ANY);
            } else {
                // Get the desired field position
                int pos = indexFieldId.fieldName.size() > 1 ? -1 : sourceType.getFieldIndex(indexFieldId.fieldName.get(0));
                // Field not found --> This is either an open field or a nested field. it can't be accessed by index
                theFieldAccessFunc = (pos == -1) ? getOpenOrNestedFieldAccessFunction(varRef, indexFieldId.fieldName) : getClosedFieldAccessFunction(varRef, pos);
            }
            vars.add(fieldVar);
            exprs.add(new MutableObject<ILogicalExpression>(theFieldAccessFunc));
            fieldAccessVars.put(indexFieldId, fieldVar);
        }
    }
    // AssignOperator assigns secondary keys to their vars
    AssignOperator castedFieldAssignOperator = new AssignOperator(vars, exprs);
    return introduceNewOp(context, currentTop, castedFieldAssignOperator, afterOp);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ArrayList(java.util.ArrayList) Index(org.apache.asterix.metadata.entities.Index) DataSourceIndex(org.apache.asterix.metadata.declared.DataSourceIndex) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) List(java.util.List) AOrderedList(org.apache.asterix.om.base.AOrderedList) ArrayList(java.util.ArrayList) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType) MutableObject(org.apache.commons.lang3.mutable.MutableObject) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression)

Example 29 with Index

use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.

the class AbstractIntroduceAccessMethodRule method pruneIndexCandidates.

/**
     * Removes irrelevant access methods candidates, based on whether the
     * expressions in the query match those in the index. For example, some
     * index may require all its expressions to be matched, and some indexes may
     * only require a match on a prefix of fields to be applicable. This methods
     * removes all index candidates indexExprs that are definitely not
     * applicable according to the expressions involved.
     *
     * @throws AlgebricksException
     */
public void pruneIndexCandidates(IAccessMethod accessMethod, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, IVariableTypeEnvironment typeEnvironment) throws AlgebricksException {
    Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexExprAndVarIt = analysisCtx.getIteratorForIndexExprsAndVars();
    // Used to keep track of matched expressions (added for prefix search)
    int numMatchedKeys = 0;
    ArrayList<Integer> matchedExpressions = new ArrayList<>();
    while (indexExprAndVarIt.hasNext()) {
        Map.Entry<Index, List<Pair<Integer, Integer>>> indexExprAndVarEntry = indexExprAndVarIt.next();
        Index index = indexExprAndVarEntry.getKey();
        boolean allUsed = true;
        int lastFieldMatched = -1;
        boolean foundKeyField = false;
        matchedExpressions.clear();
        numMatchedKeys = 0;
        for (int i = 0; i < index.getKeyFieldNames().size(); i++) {
            List<String> keyField = index.getKeyFieldNames().get(i);
            final IAType keyType = index.getKeyFieldTypes().get(i);
            Iterator<Pair<Integer, Integer>> exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
            while (exprsAndVarIter.hasNext()) {
                final Pair<Integer, Integer> exprAndVarIdx = exprsAndVarIter.next();
                final IOptimizableFuncExpr optFuncExpr = analysisCtx.getMatchedFuncExpr(exprAndVarIdx.first);
                // expr and continue.
                if (!accessMethod.exprIsOptimizable(index, optFuncExpr)) {
                    exprsAndVarIter.remove();
                    continue;
                }
                boolean typeMatch = true;
                //Prune indexes based on field types
                List<IAType> matchedTypes = new ArrayList<>();
                //retrieve types of expressions joined/selected with an indexed field
                for (int j = 0; j < optFuncExpr.getNumLogicalVars(); j++) {
                    if (j != exprAndVarIdx.second) {
                        matchedTypes.add(optFuncExpr.getFieldType(j));
                    }
                }
                if (matchedTypes.size() < 2 && optFuncExpr.getNumLogicalVars() == 1) {
                    matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getConstantExpr(0), context.getMetadataProvider(), typeEnvironment));
                }
                //infer type of logicalExpr based on index keyType
                matchedTypes.add((IAType) ExpressionTypeComputer.INSTANCE.getType(optFuncExpr.getLogicalExpr(exprAndVarIdx.second), null, new IVariableTypeEnvironment() {

                    @Override
                    public Object getVarType(LogicalVariable var) throws AlgebricksException {
                        if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
                            return keyType;
                        }
                        throw new IllegalArgumentException();
                    }

                    @Override
                    public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariables, List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException {
                        if (var.equals(optFuncExpr.getSourceVar(exprAndVarIdx.second))) {
                            return keyType;
                        }
                        throw new IllegalArgumentException();
                    }

                    @Override
                    public void setVarType(LogicalVariable var, Object type) {
                        throw new IllegalArgumentException();
                    }

                    @Override
                    public Object getType(ILogicalExpression expr) throws AlgebricksException {
                        return ExpressionTypeComputer.INSTANCE.getType(expr, null, this);
                    }

                    @Override
                    public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException {
                        throw new IllegalArgumentException();
                    }
                }));
                //for the case when jaccard similarity is measured between ordered & unordered lists
                boolean jaccardSimilarity = optFuncExpr.getFuncExpr().getFunctionIdentifier().getName().startsWith("similarity-jaccard-check");
                // Full-text search consideration: an (un)ordered list of string type can be compatible with string
                // type. i.e. an (un)ordered list can be provided as arguments to a string type field index.
                List<IAType> elementTypes = matchedTypes;
                if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.FULLTEXT_CONTAINS_WO_OPTION) {
                    for (int j = 0; j < matchedTypes.size(); j++) {
                        if (matchedTypes.get(j).getTypeTag() == ATypeTag.ARRAY || matchedTypes.get(j).getTypeTag() == ATypeTag.MULTISET) {
                            elementTypes.set(j, ((AbstractCollectionType) matchedTypes.get(j)).getItemType());
                        }
                    }
                }
                for (int j = 0; j < matchedTypes.size(); j++) {
                    for (int k = j + 1; k < matchedTypes.size(); k++) {
                        typeMatch &= isMatched(elementTypes.get(j), elementTypes.get(k), jaccardSimilarity);
                    }
                }
                // Check if any field name in the optFuncExpr matches.
                if (optFuncExpr.findFieldName(keyField) != -1) {
                    foundKeyField = typeMatch && optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).hasDataSourceScan();
                    if (foundKeyField) {
                        matchedExpressions.add(exprAndVarIdx.first);
                        numMatchedKeys++;
                        if (lastFieldMatched == i - 1) {
                            lastFieldMatched = i;
                        }
                        break;
                    }
                }
            }
            if (!foundKeyField) {
                allUsed = false;
                // if any expression was matched, remove the non-matched expressions, otherwise the index is unusable
                if (lastFieldMatched >= 0) {
                    exprsAndVarIter = indexExprAndVarEntry.getValue().iterator();
                    while (exprsAndVarIter.hasNext()) {
                        if (!matchedExpressions.contains(exprsAndVarIter.next().first)) {
                            exprsAndVarIter.remove();
                        }
                    }
                }
                break;
            }
        }
        // are not, remove this candidate.
        if (!allUsed && accessMethod.matchAllIndexExprs()) {
            indexExprAndVarIt.remove();
            continue;
        }
        // A prefix of the index exprs may have been matched.
        if (accessMethod.matchPrefixIndexExprs()) {
            if (lastFieldMatched < 0) {
                indexExprAndVarIt.remove();
                continue;
            }
        }
        analysisCtx.putNumberOfMatchedKeys(index, Integer.valueOf(numMatchedKeys));
    }
}
Also used : ArrayList(java.util.ArrayList) Index(org.apache.asterix.metadata.entities.Index) AString(org.apache.asterix.om.base.AString) AOrderedList(org.apache.asterix.om.base.AOrderedList) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.hyracks.algebricks.common.utils.Pair) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) HashMap(java.util.HashMap) Map(java.util.Map) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) IAType(org.apache.asterix.om.types.IAType)

Example 30 with Index

use of org.apache.asterix.metadata.entities.Index in project asterixdb by apache.

the class QueryTranslator method handleCreateIndexStatement.

protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
    ProgressState progress = ProgressState.NO_PROGRESS;
    CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
    String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
    String datasetName = stmtCreateIndex.getDatasetName().getValue();
    List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
    String indexName = null;
    JobSpecification spec = null;
    Dataset ds = null;
    // For external datasets
    List<ExternalFile> externalFilesSnapshot = null;
    boolean firstExternalDatasetIndex = false;
    boolean filesIndexReplicated = false;
    Index filesIndex = null;
    boolean datasetLocked = false;
    Index index = null;
    try {
        ds = metadataProvider.findDataset(dataverseName, datasetName);
        if (ds == null) {
            throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
        }
        indexName = stmtCreateIndex.getIndexName().getValue();
        index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
        ARecordType aRecordType = (ARecordType) dt.getDatatype();
        ARecordType metaRecordType = null;
        if (ds.hasMetaPart()) {
            Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
            metaRecordType = (ARecordType) metaDt.getDatatype();
        }
        List<List<String>> indexFields = new ArrayList<>();
        List<IAType> indexFieldTypes = new ArrayList<>();
        int keyIndex = 0;
        for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
            IAType fieldType = null;
            ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
            boolean isOpen = subType.isOpen();
            int i = 0;
            if (fieldExpr.first.size() > 1 && !isOpen) {
                while (i < fieldExpr.first.size() - 1 && !isOpen) {
                    subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
                    i++;
                    isOpen = subType.isOpen();
                }
            }
            if (fieldExpr.second == null) {
                fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
            } else {
                if (!stmtCreateIndex.isEnforced()) {
                    throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
                }
                if (!isOpen) {
                    throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
                }
                if (stmtCreateIndex.hasMetaField()) {
                    throw new AlgebricksException("Typed open index can only be created on the record part");
                }
                Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
                TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
                fieldType = typeMap.get(typeSignature);
            }
            if (fieldType == null) {
                throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
            }
            indexFields.add(fieldExpr.first);
            indexFieldTypes.add(fieldType);
            ++keyIndex;
        }
        ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
        if (index != null) {
            if (stmtCreateIndex.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("An index with this name " + indexName + " already exists.");
            }
        }
        // error message and stop.
        if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
            List<List<String>> partitioningKeys = ds.getPrimaryKeys();
            for (List<String> partitioningKey : partitioningKeys) {
                IAType keyType = aRecordType.getSubFieldType(partitioningKey);
                ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
                // If it is not a fixed length
                if (typeTrait.getFixedLength() < 0) {
                    throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
                }
            }
        }
        if (ds.getDatasetType() == DatasetType.INTERNAL) {
            validateIfResourceIsActiveInFeed(ds);
        } else {
            // Check if the dataset is indexible
            if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
                throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
            }
            // Check if the name of the index is valid
            if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
                throw new AlgebricksException("external dataset index name is invalid");
            }
            // Check if the files index exist
            filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
            firstExternalDatasetIndex = filesIndex == null;
            // Lock external dataset
            ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
            datasetLocked = true;
            if (firstExternalDatasetIndex) {
                // Verify that no one has created an index before we acquire the lock
                filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                if (filesIndex != null) {
                    ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
                    firstExternalDatasetIndex = false;
                    ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
                }
            }
            if (firstExternalDatasetIndex) {
                // Get snapshot from External File System
                externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
                // Add an entry for the files index
                filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
                MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
                // Add files to the external files index
                for (ExternalFile file : externalFilesSnapshot) {
                    MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
                }
                // This is the first index for the external dataset, replicate the files index
                spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
                if (spec == null) {
                    throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
                }
                filesIndexReplicated = true;
                JobUtils.runJob(hcc, spec, true);
            }
        }
        // check whether there exists another enforced index on the same field
        if (stmtCreateIndex.isEnforced()) {
            List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
            for (Index existingIndex : indexes) {
                if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
                    throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
                }
            }
        }
        // #. add a new index with PendingAddOp
        index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // #. prepare to create the index artifact in NC.
        spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
        if (spec == null) {
            throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
        // #. create the index artifact in NC.
        JobUtils.runJob(hcc, spec, true);
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. load data into the index in NC.
        spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        JobUtils.runJob(hcc, spec, true);
        // #. begin new metadataTxn
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. add another new index with PendingNoOp after deleting the index with PendingAddOp
        MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        index.setPendingOp(MetadataUtil.PENDING_NO_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // PendingAddOp
        if (firstExternalDatasetIndex) {
            MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
            filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
            MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
            // update transaction timestamp
            ((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        // If files index was replicated for external dataset, it should be cleaned up on NC side
        if (filesIndexReplicated) {
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            try {
                JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
        }
        if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
            // #. execute compensation operations
            // remove the index in NC
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
            if (firstExternalDatasetIndex) {
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop External Files from metadata
                    MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
                }
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop the files index from metadata
                    MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
                }
            }
            // remove the record from the metadata.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                abort(e, e2, mdTxnCtx);
                throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
            }
        }
        throw e;
    } finally {
        metadataProvider.getLocks().unlock();
        if (datasetLocked) {
            ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
        }
    }
}
Also used : ProgressState(org.apache.asterix.common.utils.JobUtils.ProgressState) ArrayList(java.util.ArrayList) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Index(org.apache.asterix.metadata.entities.Index) Datatype(org.apache.asterix.metadata.entities.Datatype) TypeSignature(org.apache.asterix.om.types.TypeSignature) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) ArrayList(java.util.ArrayList) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) CompilationException(org.apache.asterix.common.exceptions.CompilationException) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) TypeExpression(org.apache.asterix.lang.common.expression.TypeExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CreateIndexStatement(org.apache.asterix.lang.common.statement.CreateIndexStatement) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) DatasetNodegroupCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetNodegroupCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Aggregations

Index (org.apache.asterix.metadata.entities.Index)53 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)26 Dataset (org.apache.asterix.metadata.entities.Dataset)25 ArrayList (java.util.ArrayList)24 MetadataException (org.apache.asterix.metadata.MetadataException)20 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)16 ARecordType (org.apache.asterix.om.types.ARecordType)15 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 Pair (org.apache.hyracks.algebricks.common.utils.Pair)14 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)14 JobSpecification (org.apache.hyracks.api.job.JobSpecification)13 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)13 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)13 AsterixException (org.apache.asterix.common.exceptions.AsterixException)12 IAType (org.apache.asterix.om.types.IAType)12 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)12 IOException (java.io.IOException)11 CompilationException (org.apache.asterix.common.exceptions.CompilationException)11 List (java.util.List)10 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)10