Examples with InternalDatasetDetails - org.apache.asterix.metadata.entities.InternalDatasetDetails

Example 6 with InternalDatasetDetails

use of org.apache.asterix.metadata.entities.InternalDatasetDetails in project asterixdb by apache.

the class DatasetTupleTranslator method createDatasetFromARecord.

protected Dataset createDatasetFromARecord(ARecord datasetRecord) throws HyracksDataException {
    String dataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATAVERSENAME_FIELD_INDEX)).getStringValue();
    String datasetName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETNAME_FIELD_INDEX)).getStringValue();
    String typeName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPENAME_FIELD_INDEX)).getStringValue();
    String typeDataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPEDATAVERSENAME_FIELD_INDEX)).getStringValue();
    DatasetType datasetType = DatasetType.valueOf(((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETTYPE_FIELD_INDEX)).getStringValue());
    IDatasetDetails datasetDetails = null;
    int datasetId = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETID_FIELD_INDEX)).getIntegerValue();
    int pendingOp = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_PENDINGOP_FIELD_INDEX)).getIntegerValue();
    String nodeGroupName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_GROUPNAME_FIELD_INDEX)).getStringValue();
    String compactionPolicy = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_FIELD_INDEX)).getStringValue();
    IACursor cursor = ((AOrderedList) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_PROPERTIES_FIELD_INDEX)).getCursor();
    Map<String, String> compactionPolicyProperties = new LinkedHashMap<>();
    String key;
    String value;
    while (cursor.next()) {
        ARecord field = (ARecord) cursor.get();
        key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
        value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
        compactionPolicyProperties.put(key, value);
    }
    switch(datasetType) {
        case INTERNAL:
            {
                ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_INTERNALDETAILS_FIELD_INDEX);
                FileStructure fileStructure = FileStructure.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_FILESTRUCTURE_FIELD_INDEX)).getStringValue());
                PartitioningStrategy partitioningStrategy = PartitioningStrategy.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONSTRATEGY_FIELD_INDEX)).getStringValue());
                cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONKEY_FIELD_INDEX)).getCursor();
                List<List<String>> partitioningKey = new ArrayList<>();
                List<IAType> partitioningKeyType = new ArrayList<>();
                AOrderedList fieldNameList;
                while (cursor.next()) {
                    fieldNameList = (AOrderedList) cursor.get();
                    IACursor nestedFieldNameCursor = (fieldNameList.getCursor());
                    List<String> nestedFieldName = new ArrayList<>();
                    while (nestedFieldNameCursor.next()) {
                        nestedFieldName.add(((AString) nestedFieldNameCursor.get()).getStringValue());
                    }
                    partitioningKey.add(nestedFieldName);
                    partitioningKeyType.add(BuiltinType.ASTRING);
                }
                boolean autogenerated = ((ABoolean) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_AUTOGENERATED_FIELD_INDEX)).getBoolean();
                // Check if there is a filter field.
                List<String> filterField = null;
                int filterFieldPos = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.FILTER_FIELD_NAME);
                if (filterFieldPos >= 0) {
                    filterField = new ArrayList<>();
                    cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(filterFieldPos)).getCursor();
                    while (cursor.next()) {
                        filterField.add(((AString) cursor.get()).getStringValue());
                    }
                }
                // Read a field-source-indicator field.
                List<Integer> keyFieldSourceIndicator = new ArrayList<>();
                int keyFieldSourceIndicatorIndex = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.KEY_FILD_SOURCE_INDICATOR_FIELD_NAME);
                if (keyFieldSourceIndicatorIndex >= 0) {
                    cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(keyFieldSourceIndicatorIndex)).getCursor();
                    while (cursor.next()) {
                        keyFieldSourceIndicator.add((int) ((AInt8) cursor.get()).getByteValue());
                    }
                } else {
                    for (int index = 0; index < partitioningKey.size(); ++index) {
                        keyFieldSourceIndicator.add(0);
                    }
                }
                // Temporary dataset only lives in the compiler therefore the temp field is false.
                //  DatasetTupleTranslator always read from the metadata node, so the temp flag should be always false.
                datasetDetails = new InternalDatasetDetails(fileStructure, partitioningStrategy, partitioningKey, partitioningKey, keyFieldSourceIndicator, partitioningKeyType, autogenerated, filterField, false);
                break;
            }
        case EXTERNAL:
            ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_EXTERNALDETAILS_FIELD_INDEX);
            String adapter = ((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_DATASOURCE_ADAPTER_FIELD_INDEX)).getStringValue();
            cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_PROPERTIES_FIELD_INDEX)).getCursor();
            Map<String, String> properties = new HashMap<>();
            while (cursor.next()) {
                ARecord field = (ARecord) cursor.get();
                key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
                value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
                properties.put(key, value);
            }
            // Timestamp
            Date timestamp = new Date((((ADateTime) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_LAST_REFRESH_TIME_FIELD_INDEX))).getChrononTime());
            // State
            TransactionState state = TransactionState.values()[((AInt32) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_TRANSACTION_STATE_FIELD_INDEX)).getIntegerValue()];
            datasetDetails = new ExternalDatasetDetails(adapter, properties, timestamp, state);
    }
    Map<String, String> hints = getDatasetHints(datasetRecord);
    String metaTypeDataverseName = null;
    String metaTypeName = null;
    int metaTypeDataverseNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METADATA_DATAVERSE);
    if (metaTypeDataverseNameIndex >= 0) {
        metaTypeDataverseName = ((AString) datasetRecord.getValueByPos(metaTypeDataverseNameIndex)).getStringValue();
        int metaTypeNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METATYPE_NAME);
        metaTypeName = ((AString) datasetRecord.getValueByPos(metaTypeNameIndex)).getStringValue();
    }
    // Read the rebalance count if there is one.
    int rebalanceCountIndex = datasetRecord.getType().getFieldIndex(REBALANCE_ID_FIELD_NAME);
    long rebalanceCount = rebalanceCountIndex >= 0 ? ((AInt64) datasetRecord.getValueByPos(rebalanceCountIndex)).getLongValue() : 0;
    return new Dataset(dataverseName, datasetName, typeDataverseName, typeName, metaTypeDataverseName, metaTypeName, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails, hints, datasetType, datasetId, pendingOp, rebalanceCount);
}

Also used : TransactionState(org.apache.asterix.common.config.DatasetConfig.TransactionState) FileStructure(org.apache.asterix.metadata.entities.InternalDatasetDetails.FileStructure) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Dataset(org.apache.asterix.metadata.entities.Dataset) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ArrayList(java.util.ArrayList) ADateTime(org.apache.asterix.om.base.ADateTime) DatasetType(org.apache.asterix.common.config.DatasetConfig.DatasetType) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) IACursor(org.apache.asterix.om.base.IACursor) IDatasetDetails(org.apache.asterix.metadata.IDatasetDetails) AInt32(org.apache.asterix.om.base.AInt32) Date(java.util.Date) LinkedHashMap(java.util.LinkedHashMap) ARecord(org.apache.asterix.om.base.ARecord) AOrderedList(org.apache.asterix.om.base.AOrderedList) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) PartitioningStrategy(org.apache.asterix.metadata.entities.InternalDatasetDetails.PartitioningStrategy) List(java.util.List) AOrderedList(org.apache.asterix.om.base.AOrderedList) AUnorderedList(org.apache.asterix.om.base.AUnorderedList) ArrayList(java.util.ArrayList) AString(org.apache.asterix.om.base.AString)

Example 7 with InternalDatasetDetails

use of org.apache.asterix.metadata.entities.InternalDatasetDetails in project asterixdb by apache.

the class DatasetDataSource method initInternalDataset.

private void initInternalDataset(IAType itemType, IAType metaItemType, IDatasetDetails datasetDetails) throws AlgebricksException {
    InternalDatasetDetails internalDatasetDetails = (InternalDatasetDetails) datasetDetails;
    ARecordType recordType = (ARecordType) itemType;
    ARecordType metaRecordType = (ARecordType) metaItemType;
    List<IAType> partitioningKeyTypes = KeyFieldTypeUtil.getPartitioningKeyTypes(internalDatasetDetails, recordType, metaRecordType);
    int n = partitioningKeyTypes.size();
    schemaTypes = metaItemType == null ? new IAType[n + 1] : new IAType[n + 2];
    for (int keyIndex = 0; keyIndex < n; ++keyIndex) {
        schemaTypes[keyIndex] = partitioningKeyTypes.get(keyIndex);
    }
    schemaTypes[n] = itemType;
    if (metaItemType != null) {
        schemaTypes[n + 1] = metaItemType;
    }
}

Also used : InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ARecordType(org.apache.asterix.om.types.ARecordType) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IAType(org.apache.asterix.om.types.IAType)

Example 8 with InternalDatasetDetails

use of org.apache.asterix.metadata.entities.InternalDatasetDetails in project asterixdb by apache.

the class IntroduceSecondaryIndexInsertDeleteRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
    if (op0.getOperatorTag() != LogicalOperatorTag.DELEGATE_OPERATOR && op0.getOperatorTag() != LogicalOperatorTag.SINK) {
        return false;
    }
    if (op0.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
        DelegateOperator eOp = (DelegateOperator) op0;
        if (!(eOp.getDelegate() instanceof CommitOperator)) {
            return false;
        }
    }
    AbstractLogicalOperator op1 = (AbstractLogicalOperator) op0.getInputs().get(0).getValue();
    if (op1.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
        return false;
    }
    /** find the record variable */
    InsertDeleteUpsertOperator primaryIndexModificationOp = (InsertDeleteUpsertOperator) op0.getInputs().get(0).getValue();
    boolean isBulkload = primaryIndexModificationOp.isBulkload();
    ILogicalExpression newRecordExpr = primaryIndexModificationOp.getPayloadExpression().getValue();
    List<Mutable<ILogicalExpression>> newMetaExprs = primaryIndexModificationOp.getAdditionalNonFilteringExpressions();
    LogicalVariable newRecordVar;
    LogicalVariable newMetaVar = null;
    /**
         * inputOp is the assign operator which extracts primary keys from the input
         * variables (record or meta)
         */
    AbstractLogicalOperator inputOp = (AbstractLogicalOperator) primaryIndexModificationOp.getInputs().get(0).getValue();
    newRecordVar = getRecordVar(context, inputOp, newRecordExpr, 0);
    if (newMetaExprs != null && !newMetaExprs.isEmpty()) {
        if (newMetaExprs.size() > 1) {
            throw new AlgebricksException("Number of meta records can't be more than 1. Number of meta records found = " + newMetaExprs.size());
        }
        newMetaVar = getRecordVar(context, inputOp, newMetaExprs.get(0).getValue(), 1);
    }
    /*
         * At this point, we have the record variable and the insert/delete/upsert operator
         * Note: We have two operators:
         * 1. An InsertDeleteOperator (primary)
         * 2. An IndexInsertDeleteOperator (secondary)
         * The current primaryIndexModificationOp is of the first type
         */
    DataSource datasetSource = (DataSource) primaryIndexModificationOp.getDataSource();
    MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
    String dataverseName = datasetSource.getId().getDataverseName();
    String datasetName = datasetSource.getId().getDatasourceName();
    Dataset dataset = mp.findDataset(dataverseName, datasetName);
    if (dataset == null) {
        throw new AlgebricksException("Unknown dataset " + datasetName + " in dataverse " + dataverseName);
    }
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        return false;
    }
    // Create operators for secondary index insert / delete.
    String itemTypeName = dataset.getItemTypeName();
    IAType itemType = mp.findType(dataset.getItemTypeDataverseName(), itemTypeName);
    if (itemType.getTypeTag() != ATypeTag.OBJECT) {
        throw new AlgebricksException("Only record types can be indexed.");
    }
    ARecordType recType = (ARecordType) itemType;
    // meta type
    ARecordType metaType = null;
    if (dataset.hasMetaPart()) {
        metaType = (ARecordType) mp.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName());
    }
    List<Index> indexes = mp.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName());
    // Set the top operator pointer to the primary IndexInsertDeleteOperator
    ILogicalOperator currentTop = primaryIndexModificationOp;
    boolean hasSecondaryIndex = false;
    // Put an n-gram or a keyword index in the later stage of index-update,
    // since TokenizeOperator needs to be involved.
    Collections.sort(indexes, (o1, o2) -> o1.getIndexType().ordinal() - o2.getIndexType().ordinal());
    // At this point, we have the data type info, and the indexes info as well
    int secondaryIndexTotalCnt = indexes.size() - 1;
    if (secondaryIndexTotalCnt > 0) {
        op0.getInputs().clear();
    } else {
        return false;
    }
    // Initialize inputs to the SINK operator Op0 (The SINK) is now without input
    // Prepare filtering field information (This is the filter created using the "filter with" key word in the
    // create dataset ddl)
    List<String> filteringFields = ((InternalDatasetDetails) dataset.getDatasetDetails()).getFilterField();
    List<LogicalVariable> filteringVars;
    List<Mutable<ILogicalExpression>> filteringExpressions = null;
    if (filteringFields != null) {
        // The filter field var already exists. we can simply get it from the insert op
        filteringVars = new ArrayList<>();
        filteringExpressions = new ArrayList<>();
        for (Mutable<ILogicalExpression> filteringExpression : primaryIndexModificationOp.getAdditionalFilteringExpressions()) {
            filteringExpression.getValue().getUsedVariables(filteringVars);
            for (LogicalVariable var : filteringVars) {
                filteringExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(var)));
            }
        }
    }
    // Replicate Operator is applied only when doing the bulk-load.
    ReplicateOperator replicateOp = null;
    if (secondaryIndexTotalCnt > 1 && primaryIndexModificationOp.isBulkload()) {
        // Split the logical plan into "each secondary index update branch"
        // to replicate each <PK,OBJECT> pair.
        replicateOp = new ReplicateOperator(secondaryIndexTotalCnt);
        replicateOp.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
        replicateOp.setExecutionMode(ExecutionMode.PARTITIONED);
        context.computeAndSetTypeEnvironmentForOperator(replicateOp);
        currentTop = replicateOp;
    }
    /*
         * The two maps are used to store variables to which [casted] field access is assigned.
         * One for the beforeOp record and the other for the new record.
         * There are two uses for these maps:
         * 1. used for shared fields in indexes with overlapping keys.
         * 2. used for setting variables of secondary keys for each secondary index operator.
         */
    Map<IndexFieldId, LogicalVariable> fieldVarsForBeforeOperation = new HashMap<>();
    Map<IndexFieldId, LogicalVariable> fieldVarsForNewRecord = new HashMap<>();
    /*
         * if the index is enforcing field types (For open indexes), We add a cast
         * operator to ensure type safety
         */
    try {
        if (primaryIndexModificationOp.getOperation() == Kind.INSERT || primaryIndexModificationOp.getOperation() == Kind.UPSERT || /* Actually, delete should not be here but it is now until issue
                     * https://issues.apache.org/jira/browse/ASTERIXDB-1507
                     * is solved
                     */
        primaryIndexModificationOp.getOperation() == Kind.DELETE) {
            injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForNewRecord, recType, metaType, newRecordVar, newMetaVar, primaryIndexModificationOp, false);
            if (replicateOp != null) {
                context.computeAndSetTypeEnvironmentForOperator(replicateOp);
            }
        }
        if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) /* Actually, delete should be here but it is not until issue
             * https://issues.apache.org/jira/browse/ASTERIXDB-1507
             * is solved
             */
        {
            List<LogicalVariable> beforeOpMetaVars = primaryIndexModificationOp.getBeforeOpAdditionalNonFilteringVars();
            LogicalVariable beforeOpMetaVar = beforeOpMetaVars == null ? null : beforeOpMetaVars.get(0);
            currentTop = injectFieldAccessesForIndexes(context, dataset, indexes, fieldVarsForBeforeOperation, recType, metaType, primaryIndexModificationOp.getBeforeOpRecordVar(), beforeOpMetaVar, currentTop, true);
        }
    } catch (AsterixException e) {
        throw new AlgebricksException(e);
    }
    // At first, op1 is the index insert op insertOp
    for (Index index : indexes) {
        if (!index.isSecondaryIndex()) {
            continue;
        }
        hasSecondaryIndex = true;
        // Get the secondary fields names and types
        List<List<String>> secondaryKeyFields = index.getKeyFieldNames();
        List<LogicalVariable> secondaryKeyVars = new ArrayList<>();
        List<Mutable<ILogicalExpression>> secondaryExpressions = new ArrayList<>();
        List<Mutable<ILogicalExpression>> beforeOpSecondaryExpressions = new ArrayList<>();
        ILogicalOperator replicateOutput;
        for (int i = 0; i < secondaryKeyFields.size(); i++) {
            IndexFieldId indexFieldId = new IndexFieldId(index.getKeyFieldSourceIndicators().get(i), secondaryKeyFields.get(i));
            LogicalVariable skVar = fieldVarsForNewRecord.get(indexFieldId);
            secondaryKeyVars.add(skVar);
            secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(skVar)));
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(fieldVarsForBeforeOperation.get(indexFieldId))));
            }
        }
        IndexInsertDeleteUpsertOperator indexUpdate;
        if (index.getIndexType() != IndexType.RTREE) {
            // Create an expression per key
            Mutable<ILogicalExpression> filterExpression = (primaryIndexModificationOp.getOperation() == Kind.UPSERT) ? null : createFilterExpression(secondaryKeyVars, context.getOutputTypeEnvironment(currentTop), index.isEnforcingKeyFileds());
            DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
            // and index type is keyword or n-gram.
            if (index.getIndexType() != IndexType.BTREE && primaryIndexModificationOp.isBulkload()) {
                // Note: Bulk load case, we don't need to take care of it for upsert operation
                // Check whether the index is length-partitioned or not.
                // If partitioned, [input variables to TokenizeOperator,
                // token, number of token] pairs will be generated and
                // fed into the IndexInsertDeleteOperator.
                // If not, [input variables, token] pairs will be generated
                // and fed into the IndexInsertDeleteOperator.
                // Input variables are passed since TokenizeOperator is not an
                // filtering operator.
                boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
                // Create a new logical variable - token
                List<LogicalVariable> tokenizeKeyVars = new ArrayList<>();
                List<Mutable<ILogicalExpression>> tokenizeKeyExprs = new ArrayList<>();
                LogicalVariable tokenVar = context.newVar();
                tokenizeKeyVars.add(tokenVar);
                tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(tokenVar)));
                // Check the field type of the secondary key.
                IAType secondaryKeyType;
                Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
                secondaryKeyType = keyPairType.first;
                List<Object> varTypes = new ArrayList<>();
                varTypes.add(NonTaggedFormatUtil.getTokenType(secondaryKeyType));
                // The type is short, and this does not contain type info.
                if (isPartitioned) {
                    LogicalVariable lengthVar = context.newVar();
                    tokenizeKeyVars.add(lengthVar);
                    tokenizeKeyExprs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(lengthVar)));
                    varTypes.add(BuiltinType.SHORTWITHOUTTYPEINFO);
                }
                // TokenizeOperator to tokenize [SK, PK] pairs
                TokenizeOperator tokenUpdate = new TokenizeOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, tokenizeKeyVars, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), isPartitioned, varTypes);
                tokenUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
                context.computeAndSetTypeEnvironmentForOperator(tokenUpdate);
                replicateOutput = tokenUpdate;
                indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), tokenizeKeyExprs, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
                indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(tokenUpdate));
            } else {
                // When TokenizeOperator is not needed
                indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
                indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
                replicateOutput = indexUpdate;
                // We add the necessary expressions for upsert
                if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                    indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
                    if (filteringFields != null) {
                        indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
                    }
                }
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
            }
        } else {
            // Get type, dimensions and number of keys
            Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), secondaryKeyFields.get(0), recType);
            IAType spatialType = keyPairType.first;
            boolean isPointMBR = spatialType.getTypeTag() == ATypeTag.POINT || spatialType.getTypeTag() == ATypeTag.POINT3D;
            int dimension = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
            int numKeys = (isPointMBR && isBulkload) ? dimension : dimension * 2;
            // Get variables and expressions
            List<LogicalVariable> keyVarList = new ArrayList<>();
            List<Mutable<ILogicalExpression>> keyExprList = new ArrayList<>();
            for (int i = 0; i < numKeys; i++) {
                LogicalVariable keyVar = context.newVar();
                keyVarList.add(keyVar);
                AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVars.get(0))));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
                createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
                keyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
            }
            secondaryExpressions.clear();
            for (LogicalVariable secondaryKeyVar : keyVarList) {
                secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
            }
            if (isPointMBR && isBulkload) {
                //createFieldPermutationForBulkLoadOp(int) for more details.
                for (LogicalVariable secondaryKeyVar : keyVarList) {
                    secondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
                }
            }
            AssignOperator assignCoordinates = new AssignOperator(keyVarList, keyExprList);
            assignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
            context.computeAndSetTypeEnvironmentForOperator(assignCoordinates);
            replicateOutput = assignCoordinates;
            Mutable<ILogicalExpression> filterExpression = null;
            AssignOperator originalAssignCoordinates = null;
            // We do something similar for beforeOp key if the operation is an upsert
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                List<LogicalVariable> originalKeyVarList = new ArrayList<>();
                List<Mutable<ILogicalExpression>> originalKeyExprList = new ArrayList<>();
                // we don't do any filtering since nulls are expected here and there
                for (int i = 0; i < numKeys; i++) {
                    LogicalVariable keyVar = context.newVar();
                    originalKeyVarList.add(keyVar);
                    AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
                    createMBR.getArguments().add(beforeOpSecondaryExpressions.get(0));
                    createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(dimension)))));
                    createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
                    originalKeyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
                }
                beforeOpSecondaryExpressions.clear();
                for (LogicalVariable secondaryKeyVar : originalKeyVarList) {
                    beforeOpSecondaryExpressions.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(secondaryKeyVar)));
                }
                originalAssignCoordinates = new AssignOperator(originalKeyVarList, originalKeyExprList);
                originalAssignCoordinates.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
                context.computeAndSetTypeEnvironmentForOperator(originalAssignCoordinates);
            } else {
                // We must enforce the filter if the originating spatial type is
                // nullable.
                boolean forceFilter = keyPairType.second;
                filterExpression = createFilterExpression(keyVarList, context.getOutputTypeEnvironment(assignCoordinates), forceFilter);
            }
            DataSourceIndex dataSourceIndex = new DataSourceIndex(index, dataverseName, datasetName, mp);
            indexUpdate = new IndexInsertDeleteUpsertOperator(dataSourceIndex, primaryIndexModificationOp.getPrimaryKeyExpressions(), secondaryExpressions, filterExpression, primaryIndexModificationOp.getOperation(), primaryIndexModificationOp.isBulkload(), primaryIndexModificationOp.getAdditionalNonFilteringExpressions() == null ? 0 : primaryIndexModificationOp.getAdditionalNonFilteringExpressions().size());
            indexUpdate.setAdditionalFilteringExpressions(filteringExpressions);
            if (primaryIndexModificationOp.getOperation() == Kind.UPSERT) {
                // set before op secondary key expressions
                if (filteringFields != null) {
                    indexUpdate.setBeforeOpAdditionalFilteringExpression(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(primaryIndexModificationOp.getBeforeOpFilterVar())));
                }
                // set filtering expressions
                indexUpdate.setBeforeOpSecondaryKeyExprs(beforeOpSecondaryExpressions);
                // assign --> assign beforeOp values --> secondary index upsert
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(originalAssignCoordinates));
            } else {
                indexUpdate.getInputs().add(new MutableObject<ILogicalOperator>(assignCoordinates));
            }
        }
        context.computeAndSetTypeEnvironmentForOperator(indexUpdate);
        if (!primaryIndexModificationOp.isBulkload() || secondaryIndexTotalCnt == 1) {
            currentTop = indexUpdate;
        } else {
            replicateOp.getOutputs().add(new MutableObject<>(replicateOutput));
        }
        if (primaryIndexModificationOp.isBulkload()) {
            // For bulk load, we connect all fanned out insert operator to a single SINK operator
            op0.getInputs().add(new MutableObject<ILogicalOperator>(indexUpdate));
        }
    }
    if (!hasSecondaryIndex) {
        return false;
    }
    if (!primaryIndexModificationOp.isBulkload()) {
        // If this is an upsert, we need to
        // Remove the current input to the SINK operator (It is actually already removed above)
        op0.getInputs().clear();
        // Connect the last index update to the SINK
        op0.getInputs().add(new MutableObject<ILogicalOperator>(currentTop));
    }
    return true;
}

Also used : HashMap(java.util.HashMap) ConstantExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression) ArrayList(java.util.ArrayList) Index(org.apache.asterix.metadata.entities.Index) DataSourceIndex(org.apache.asterix.metadata.declared.DataSourceIndex) AString(org.apache.asterix.om.base.AString) AsterixException(org.apache.asterix.common.exceptions.AsterixException) TokenizeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.TokenizeOperator) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) DelegateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator) List(java.util.List) AOrderedList(org.apache.asterix.om.base.AOrderedList) ArrayList(java.util.ArrayList) CommitOperator(org.apache.asterix.algebra.operators.CommitOperator) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression) ReplicateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) DataSource(org.apache.asterix.metadata.declared.DataSource) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) MutableObject(org.apache.commons.lang3.mutable.MutableObject) IAObject(org.apache.asterix.om.base.IAObject) IAType(org.apache.asterix.om.types.IAType) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) DataSourceIndex(org.apache.asterix.metadata.declared.DataSourceIndex) IndexInsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteUpsertOperator) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) AInt32(org.apache.asterix.om.base.AInt32) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IndexInsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteUpsertOperator) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) ARecordType(org.apache.asterix.om.types.ARecordType)

Example 9 with InternalDatasetDetails

use of org.apache.asterix.metadata.entities.InternalDatasetDetails in project asterixdb by apache.

the class RemoveUnusedOneToOneEquiJoinRule method fillPKVars.

private void fillPKVars(DataSourceScanOperator dataScan, List<LogicalVariable> pkVars) {
    pkVars.clear();
    DatasetDataSource datasetDataSource = (DatasetDataSource) dataScan.getDataSource();
    pkVars.clear();
    if (datasetDataSource.getDataset().getDatasetDetails() instanceof InternalDatasetDetails) {
        int numPKs = datasetDataSource.getDataset().getPrimaryKeys().size();
        for (int i = 0; i < numPKs; i++) {
            pkVars.add(dataScan.getVariables().get(i));
        }
    }
}

Also used : InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource)

Example 10 with InternalDatasetDetails

use of org.apache.asterix.metadata.entities.InternalDatasetDetails in project asterixdb by apache.

the class IntroduceAutogenerateIDRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    // match: commit OR distribute-result OR SINK - ... followed by:
    // [insert to internal dataset with autogenerated id] - assign - project
    // produce: insert - assign - assign* - project
    // **
    // OR [insert to internal dataset with autogenerated id] - assign - [datasource scan]
    // produce insert - assign - assign* - datasource scan
    AbstractLogicalOperator currentOp = (AbstractLogicalOperator) opRef.getValue();
    if (currentOp.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR) {
        DelegateOperator dOp = (DelegateOperator) currentOp;
        if (!(dOp.getDelegate() instanceof CommitOperator)) {
            return false;
        } else if (!((CommitOperator) dOp.getDelegate()).isSink()) {
            return false;
        }
    } else if (currentOp.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT && currentOp.getOperatorTag() != LogicalOperatorTag.SINK) {
        return false;
    }
    ArrayDeque<AbstractLogicalOperator> opStack = new ArrayDeque<>();
    opStack.push(currentOp);
    while (currentOp.getInputs().size() == 1) {
        currentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
        if (currentOp.getOperatorTag() == LogicalOperatorTag.INSERT_DELETE_UPSERT) {
            break;
        }
        opStack.push(currentOp);
    }
    if (currentOp.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE_UPSERT) {
        return false;
    }
    InsertDeleteUpsertOperator insertOp = (InsertDeleteUpsertOperator) currentOp;
    if (insertOp.getOperation() != Kind.INSERT && insertOp.getOperation() != Kind.UPSERT) {
        return false;
    }
    DatasetDataSource dds = (DatasetDataSource) insertOp.getDataSource();
    boolean autogenerated = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).isAutogenerated();
    if (!autogenerated) {
        return false;
    }
    if (((DataSource) insertOp.getDataSource()).getDatasourceType() != Type.INTERNAL_DATASET) {
        return false;
    }
    AbstractLogicalOperator parentOp = (AbstractLogicalOperator) currentOp.getInputs().get(0).getValue();
    if (parentOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
        return false;
    }
    AssignOperator assignOp = (AssignOperator) parentOp;
    LogicalVariable inputRecord;
    //TODO: bug here. will not work for internal datasets with filters since the pattern becomes 
    //[project-assign-assign-insert]
    AbstractLogicalOperator grandparentOp = (AbstractLogicalOperator) parentOp.getInputs().get(0).getValue();
    if (grandparentOp.getOperatorTag() == LogicalOperatorTag.PROJECT) {
        ProjectOperator projectOp = (ProjectOperator) grandparentOp;
        inputRecord = projectOp.getVariables().get(0);
    } else if (grandparentOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
        DataSourceScanOperator dssOp = (DataSourceScanOperator) grandparentOp;
        inputRecord = dssOp.getVariables().get(0);
    } else {
        return false;
    }
    List<String> pkFieldName = ((InternalDatasetDetails) dds.getDataset().getDatasetDetails()).getPrimaryKey().get(0);
    ILogicalExpression rec0 = new VariableReferenceExpression(inputRecord);
    ILogicalExpression rec1 = createPrimaryKeyRecordExpression(pkFieldName);
    ILogicalExpression mergedRec = createRecordMergeFunction(rec0, rec1);
    ILogicalExpression nonNullMergedRec = createNotNullFunction(mergedRec);
    LogicalVariable v = context.newVar();
    AssignOperator newAssign = new AssignOperator(v, new MutableObject<ILogicalExpression>(nonNullMergedRec));
    newAssign.getInputs().add(new MutableObject<ILogicalOperator>(grandparentOp));
    assignOp.getInputs().set(0, new MutableObject<ILogicalOperator>(newAssign));
    VariableUtilities.substituteVariables(assignOp, inputRecord, v, context);
    VariableUtilities.substituteVariables(insertOp, inputRecord, v, context);
    context.computeAndSetTypeEnvironmentForOperator(newAssign);
    context.computeAndSetTypeEnvironmentForOperator(assignOp);
    context.computeAndSetTypeEnvironmentForOperator(insertOp);
    ;
    for (AbstractLogicalOperator op : opStack) {
        VariableUtilities.substituteVariables(op, inputRecord, v, context);
        context.computeAndSetTypeEnvironmentForOperator(op);
    }
    return true;
}

Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) ProjectOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) DatasetDataSource(org.apache.asterix.metadata.declared.DatasetDataSource) AString(org.apache.asterix.om.base.AString) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) ArrayDeque(java.util.ArrayDeque) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) InsertDeleteUpsertOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteUpsertOperator) DataSourceScanOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator) DelegateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) CommitOperator(org.apache.asterix.algebra.operators.CommitOperator)

Aggregations

InternalDatasetDetails (org.apache.asterix.metadata.entities.InternalDatasetDetails)14 Dataset (org.apache.asterix.metadata.entities.Dataset)8 ArrayList (java.util.ArrayList)7 List (java.util.List)7 HashMap (java.util.HashMap)6 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)5 AString (org.apache.asterix.om.base.AString)4 ARecordType (org.apache.asterix.om.types.ARecordType)4 IAType (org.apache.asterix.om.types.IAType)4 Test (org.junit.Test)4 IDatasetDetails (org.apache.asterix.metadata.IDatasetDetails)3 DatasetDataSource (org.apache.asterix.metadata.declared.DatasetDataSource)3 Index (org.apache.asterix.metadata.entities.Index)3 AInt32 (org.apache.asterix.om.base.AInt32)3 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)3 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)3 ScalarFunctionCallExpression (org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression)3 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)3 Date (java.util.Date)2 CommitOperator (org.apache.asterix.algebra.operators.CommitOperator)2