Search in sources :

Example 6 with ExternalDatasetDetails

use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.

the class DatasetTupleTranslator method createDatasetFromARecord.

protected Dataset createDatasetFromARecord(ARecord datasetRecord) throws HyracksDataException {
    String dataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATAVERSENAME_FIELD_INDEX)).getStringValue();
    String datasetName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETNAME_FIELD_INDEX)).getStringValue();
    String typeName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPENAME_FIELD_INDEX)).getStringValue();
    String typeDataverseName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATATYPEDATAVERSENAME_FIELD_INDEX)).getStringValue();
    DatasetType datasetType = DatasetType.valueOf(((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETTYPE_FIELD_INDEX)).getStringValue());
    IDatasetDetails datasetDetails = null;
    int datasetId = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_DATASETID_FIELD_INDEX)).getIntegerValue();
    int pendingOp = ((AInt32) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_PENDINGOP_FIELD_INDEX)).getIntegerValue();
    String nodeGroupName = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_GROUPNAME_FIELD_INDEX)).getStringValue();
    String compactionPolicy = ((AString) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_FIELD_INDEX)).getStringValue();
    IACursor cursor = ((AOrderedList) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_COMPACTION_POLICY_PROPERTIES_FIELD_INDEX)).getCursor();
    Map<String, String> compactionPolicyProperties = new LinkedHashMap<>();
    String key;
    String value;
    while (cursor.next()) {
        ARecord field = (ARecord) cursor.get();
        key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
        value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
        compactionPolicyProperties.put(key, value);
    }
    switch(datasetType) {
        case INTERNAL:
            {
                ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_INTERNALDETAILS_FIELD_INDEX);
                FileStructure fileStructure = FileStructure.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_FILESTRUCTURE_FIELD_INDEX)).getStringValue());
                PartitioningStrategy partitioningStrategy = PartitioningStrategy.valueOf(((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONSTRATEGY_FIELD_INDEX)).getStringValue());
                cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_PARTITIONKEY_FIELD_INDEX)).getCursor();
                List<List<String>> partitioningKey = new ArrayList<>();
                List<IAType> partitioningKeyType = new ArrayList<>();
                AOrderedList fieldNameList;
                while (cursor.next()) {
                    fieldNameList = (AOrderedList) cursor.get();
                    IACursor nestedFieldNameCursor = (fieldNameList.getCursor());
                    List<String> nestedFieldName = new ArrayList<>();
                    while (nestedFieldNameCursor.next()) {
                        nestedFieldName.add(((AString) nestedFieldNameCursor.get()).getStringValue());
                    }
                    partitioningKey.add(nestedFieldName);
                    partitioningKeyType.add(BuiltinType.ASTRING);
                }
                boolean autogenerated = ((ABoolean) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.INTERNAL_DETAILS_ARECORD_AUTOGENERATED_FIELD_INDEX)).getBoolean();
                // Check if there is a filter field.
                List<String> filterField = null;
                int filterFieldPos = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.FILTER_FIELD_NAME);
                if (filterFieldPos >= 0) {
                    filterField = new ArrayList<>();
                    cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(filterFieldPos)).getCursor();
                    while (cursor.next()) {
                        filterField.add(((AString) cursor.get()).getStringValue());
                    }
                }
                // Read a field-source-indicator field.
                List<Integer> keyFieldSourceIndicator = new ArrayList<>();
                int keyFieldSourceIndicatorIndex = datasetDetailsRecord.getType().getFieldIndex(InternalDatasetDetails.KEY_FILD_SOURCE_INDICATOR_FIELD_NAME);
                if (keyFieldSourceIndicatorIndex >= 0) {
                    cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(keyFieldSourceIndicatorIndex)).getCursor();
                    while (cursor.next()) {
                        keyFieldSourceIndicator.add((int) ((AInt8) cursor.get()).getByteValue());
                    }
                } else {
                    for (int index = 0; index < partitioningKey.size(); ++index) {
                        keyFieldSourceIndicator.add(0);
                    }
                }
                // Temporary dataset only lives in the compiler therefore the temp field is false.
                //  DatasetTupleTranslator always read from the metadata node, so the temp flag should be always false.
                datasetDetails = new InternalDatasetDetails(fileStructure, partitioningStrategy, partitioningKey, partitioningKey, keyFieldSourceIndicator, partitioningKeyType, autogenerated, filterField, false);
                break;
            }
        case EXTERNAL:
            ARecord datasetDetailsRecord = (ARecord) datasetRecord.getValueByPos(MetadataRecordTypes.DATASET_ARECORD_EXTERNALDETAILS_FIELD_INDEX);
            String adapter = ((AString) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_DATASOURCE_ADAPTER_FIELD_INDEX)).getStringValue();
            cursor = ((AOrderedList) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_PROPERTIES_FIELD_INDEX)).getCursor();
            Map<String, String> properties = new HashMap<>();
            while (cursor.next()) {
                ARecord field = (ARecord) cursor.get();
                key = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_NAME_FIELD_INDEX)).getStringValue();
                value = ((AString) field.getValueByPos(MetadataRecordTypes.PROPERTIES_VALUE_FIELD_INDEX)).getStringValue();
                properties.put(key, value);
            }
            // Timestamp
            Date timestamp = new Date((((ADateTime) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_LAST_REFRESH_TIME_FIELD_INDEX))).getChrononTime());
            // State
            TransactionState state = TransactionState.values()[((AInt32) datasetDetailsRecord.getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_TRANSACTION_STATE_FIELD_INDEX)).getIntegerValue()];
            datasetDetails = new ExternalDatasetDetails(adapter, properties, timestamp, state);
    }
    Map<String, String> hints = getDatasetHints(datasetRecord);
    String metaTypeDataverseName = null;
    String metaTypeName = null;
    int metaTypeDataverseNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METADATA_DATAVERSE);
    if (metaTypeDataverseNameIndex >= 0) {
        metaTypeDataverseName = ((AString) datasetRecord.getValueByPos(metaTypeDataverseNameIndex)).getStringValue();
        int metaTypeNameIndex = datasetRecord.getType().getFieldIndex(MetadataRecordTypes.FIELD_NAME_METATYPE_NAME);
        metaTypeName = ((AString) datasetRecord.getValueByPos(metaTypeNameIndex)).getStringValue();
    }
    // Read the rebalance count if there is one.
    int rebalanceCountIndex = datasetRecord.getType().getFieldIndex(REBALANCE_ID_FIELD_NAME);
    long rebalanceCount = rebalanceCountIndex >= 0 ? ((AInt64) datasetRecord.getValueByPos(rebalanceCountIndex)).getLongValue() : 0;
    return new Dataset(dataverseName, datasetName, typeDataverseName, typeName, metaTypeDataverseName, metaTypeName, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails, hints, datasetType, datasetId, pendingOp, rebalanceCount);
}
Also used : TransactionState(org.apache.asterix.common.config.DatasetConfig.TransactionState) FileStructure(org.apache.asterix.metadata.entities.InternalDatasetDetails.FileStructure) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Dataset(org.apache.asterix.metadata.entities.Dataset) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) ArrayList(java.util.ArrayList) ADateTime(org.apache.asterix.om.base.ADateTime) DatasetType(org.apache.asterix.common.config.DatasetConfig.DatasetType) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) IACursor(org.apache.asterix.om.base.IACursor) IDatasetDetails(org.apache.asterix.metadata.IDatasetDetails) AInt32(org.apache.asterix.om.base.AInt32) Date(java.util.Date) LinkedHashMap(java.util.LinkedHashMap) ARecord(org.apache.asterix.om.base.ARecord) AOrderedList(org.apache.asterix.om.base.AOrderedList) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) PartitioningStrategy(org.apache.asterix.metadata.entities.InternalDatasetDetails.PartitioningStrategy) List(java.util.List) AOrderedList(org.apache.asterix.om.base.AOrderedList) AUnorderedList(org.apache.asterix.om.base.AUnorderedList) ArrayList(java.util.ArrayList) AString(org.apache.asterix.om.base.AString)

Example 7 with ExternalDatasetDetails

use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.

the class DatasetDataSource method buildDatasourceScanRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
    switch(dataset.getDatasetType()) {
        case EXTERNAL:
            Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset();
            String itemTypeName = externalDataset.getItemTypeName();
            IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
            ExternalDatasetDetails edd = (ExternalDatasetDetails) externalDataset.getDatasetDetails();
            IAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset, edd.getAdapter(), edd.getProperties(), (ARecordType) itemType, null);
            return metadataProvider.buildExternalDatasetDataScannerRuntime(jobSpec, itemType, adapterFactory, NonTaggedDataFormat.INSTANCE);
        case INTERNAL:
            DataSourceId id = getId();
            String dataverseName = id.getDataverseName();
            String datasetName = id.getDatasourceName();
            Index primaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, datasetName);
            int[] minFilterFieldIndexes = createFilterIndexes(minFilterVars, opSchema);
            int[] maxFilterFieldIndexes = createFilterIndexes(maxFilterVars, opSchema);
            return metadataProvider.buildBtreeRuntime(jobSpec, opSchema, typeEnv, context, true, false, ((DatasetDataSource) dataSource).getDataset(), primaryIndex.getIndexName(), null, null, true, true, minFilterFieldIndexes, maxFilterFieldIndexes);
        default:
            throw new AlgebricksException("Unknown datasource type");
    }
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IAdapterFactory(org.apache.asterix.external.api.IAdapterFactory) Index(org.apache.asterix.metadata.entities.Index) IAType(org.apache.asterix.om.types.IAType)

Example 8 with ExternalDatasetDetails

use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.

the class QueryTranslator method handleCreateIndexStatement.

protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
    ProgressState progress = ProgressState.NO_PROGRESS;
    CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
    String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
    String datasetName = stmtCreateIndex.getDatasetName().getValue();
    List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
    String indexName = null;
    JobSpecification spec = null;
    Dataset ds = null;
    // For external datasets
    List<ExternalFile> externalFilesSnapshot = null;
    boolean firstExternalDatasetIndex = false;
    boolean filesIndexReplicated = false;
    Index filesIndex = null;
    boolean datasetLocked = false;
    Index index = null;
    try {
        ds = metadataProvider.findDataset(dataverseName, datasetName);
        if (ds == null) {
            throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
        }
        indexName = stmtCreateIndex.getIndexName().getValue();
        index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
        ARecordType aRecordType = (ARecordType) dt.getDatatype();
        ARecordType metaRecordType = null;
        if (ds.hasMetaPart()) {
            Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
            metaRecordType = (ARecordType) metaDt.getDatatype();
        }
        List<List<String>> indexFields = new ArrayList<>();
        List<IAType> indexFieldTypes = new ArrayList<>();
        int keyIndex = 0;
        for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
            IAType fieldType = null;
            ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
            boolean isOpen = subType.isOpen();
            int i = 0;
            if (fieldExpr.first.size() > 1 && !isOpen) {
                while (i < fieldExpr.first.size() - 1 && !isOpen) {
                    subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
                    i++;
                    isOpen = subType.isOpen();
                }
            }
            if (fieldExpr.second == null) {
                fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
            } else {
                if (!stmtCreateIndex.isEnforced()) {
                    throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
                }
                if (!isOpen) {
                    throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
                }
                if (stmtCreateIndex.hasMetaField()) {
                    throw new AlgebricksException("Typed open index can only be created on the record part");
                }
                Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
                TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
                fieldType = typeMap.get(typeSignature);
            }
            if (fieldType == null) {
                throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
            }
            indexFields.add(fieldExpr.first);
            indexFieldTypes.add(fieldType);
            ++keyIndex;
        }
        ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
        if (index != null) {
            if (stmtCreateIndex.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("An index with this name " + indexName + " already exists.");
            }
        }
        // error message and stop.
        if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
            List<List<String>> partitioningKeys = ds.getPrimaryKeys();
            for (List<String> partitioningKey : partitioningKeys) {
                IAType keyType = aRecordType.getSubFieldType(partitioningKey);
                ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
                // If it is not a fixed length
                if (typeTrait.getFixedLength() < 0) {
                    throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
                }
            }
        }
        if (ds.getDatasetType() == DatasetType.INTERNAL) {
            validateIfResourceIsActiveInFeed(ds);
        } else {
            // Check if the dataset is indexible
            if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
                throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
            }
            // Check if the name of the index is valid
            if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
                throw new AlgebricksException("external dataset index name is invalid");
            }
            // Check if the files index exist
            filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
            firstExternalDatasetIndex = filesIndex == null;
            // Lock external dataset
            ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
            datasetLocked = true;
            if (firstExternalDatasetIndex) {
                // Verify that no one has created an index before we acquire the lock
                filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                if (filesIndex != null) {
                    ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
                    firstExternalDatasetIndex = false;
                    ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
                }
            }
            if (firstExternalDatasetIndex) {
                // Get snapshot from External File System
                externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
                // Add an entry for the files index
                filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
                MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
                // Add files to the external files index
                for (ExternalFile file : externalFilesSnapshot) {
                    MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
                }
                // This is the first index for the external dataset, replicate the files index
                spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
                if (spec == null) {
                    throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
                }
                filesIndexReplicated = true;
                JobUtils.runJob(hcc, spec, true);
            }
        }
        // check whether there exists another enforced index on the same field
        if (stmtCreateIndex.isEnforced()) {
            List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
            for (Index existingIndex : indexes) {
                if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
                    throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
                }
            }
        }
        // #. add a new index with PendingAddOp
        index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // #. prepare to create the index artifact in NC.
        spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
        if (spec == null) {
            throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
        // #. create the index artifact in NC.
        JobUtils.runJob(hcc, spec, true);
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. load data into the index in NC.
        spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        JobUtils.runJob(hcc, spec, true);
        // #. begin new metadataTxn
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. add another new index with PendingNoOp after deleting the index with PendingAddOp
        MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        index.setPendingOp(MetadataUtil.PENDING_NO_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // PendingAddOp
        if (firstExternalDatasetIndex) {
            MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
            filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
            MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
            // update transaction timestamp
            ((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        // If files index was replicated for external dataset, it should be cleaned up on NC side
        if (filesIndexReplicated) {
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            try {
                JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
        }
        if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
            // #. execute compensation operations
            // remove the index in NC
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
            if (firstExternalDatasetIndex) {
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop External Files from metadata
                    MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
                }
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop the files index from metadata
                    MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
                }
            }
            // remove the record from the metadata.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                abort(e, e2, mdTxnCtx);
                throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
            }
        }
        throw e;
    } finally {
        metadataProvider.getLocks().unlock();
        if (datasetLocked) {
            ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
        }
    }
}
Also used : ProgressState(org.apache.asterix.common.utils.JobUtils.ProgressState) ArrayList(java.util.ArrayList) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Index(org.apache.asterix.metadata.entities.Index) Datatype(org.apache.asterix.metadata.entities.Datatype) TypeSignature(org.apache.asterix.om.types.TypeSignature) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) ArrayList(java.util.ArrayList) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) CompilationException(org.apache.asterix.common.exceptions.CompilationException) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) TypeExpression(org.apache.asterix.lang.common.expression.TypeExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CreateIndexStatement(org.apache.asterix.lang.common.statement.CreateIndexStatement) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) DatasetNodegroupCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetNodegroupCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Example 9 with ExternalDatasetDetails

use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.

the class QueryTranslator method handleCreateDatasetStatement.

public void handleCreateDatasetStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws CompilationException, Exception {
    MutableObject<ProgressState> progress = new MutableObject<>(ProgressState.NO_PROGRESS);
    DatasetDecl dd = (DatasetDecl) stmt;
    String dataverseName = getActiveDataverse(dd.getDataverse());
    String datasetName = dd.getName().getValue();
    DatasetType dsType = dd.getDatasetType();
    String itemTypeDataverseName = getActiveDataverse(dd.getItemTypeDataverse());
    String itemTypeName = dd.getItemTypeName().getValue();
    String metaItemTypeDataverseName = getActiveDataverse(dd.getMetaItemTypeDataverse());
    String metaItemTypeName = dd.getMetaItemTypeName().getValue();
    Identifier ngNameId = dd.getNodegroupName();
    String nodegroupName = ngNameId == null ? null : ngNameId.getValue();
    String compactionPolicy = dd.getCompactionPolicy();
    Map<String, String> compactionPolicyProperties = dd.getCompactionPolicyProperties();
    boolean defaultCompactionPolicy = compactionPolicy == null;
    boolean temp = dd.getDatasetDetailsDecl().isTemp();
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    MetadataLockManager.INSTANCE.createDatasetBegin(metadataProvider.getLocks(), dataverseName, itemTypeDataverseName, itemTypeDataverseName + "." + itemTypeName, metaItemTypeDataverseName, metaItemTypeDataverseName + "." + metaItemTypeName, nodegroupName, compactionPolicy, dataverseName + "." + datasetName, defaultCompactionPolicy);
    Dataset dataset = null;
    try {
        IDatasetDetails datasetDetails = null;
        Dataset ds = metadataProvider.findDataset(dataverseName, datasetName);
        if (ds != null) {
            if (dd.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("A dataset with this name " + datasetName + " already exists.");
            }
        }
        Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), itemTypeDataverseName, itemTypeName);
        if (dt == null) {
            throw new AlgebricksException(": type " + itemTypeName + " could not be found.");
        }
        String ngName = ngNameId != null ? ngNameId.getValue() : configureNodegroupForDataset(appCtx, dd.getHints(), dataverseName, datasetName, metadataProvider);
        if (compactionPolicy == null) {
            compactionPolicy = GlobalConfig.DEFAULT_COMPACTION_POLICY_NAME;
            compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
        } else {
            validateCompactionPolicy(compactionPolicy, compactionPolicyProperties, mdTxnCtx, false);
        }
        switch(dd.getDatasetType()) {
            case INTERNAL:
                IAType itemType = dt.getDatatype();
                if (itemType.getTypeTag() != ATypeTag.OBJECT) {
                    throw new AlgebricksException("Dataset type has to be a record type.");
                }
                IAType metaItemType = null;
                if (metaItemTypeDataverseName != null && metaItemTypeName != null) {
                    metaItemType = metadataProvider.findType(metaItemTypeDataverseName, metaItemTypeName);
                }
                if (metaItemType != null && metaItemType.getTypeTag() != ATypeTag.OBJECT) {
                    throw new AlgebricksException("Dataset meta type has to be a record type.");
                }
                ARecordType metaRecType = (ARecordType) metaItemType;
                List<List<String>> partitioningExprs = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getPartitioningExprs();
                List<Integer> keySourceIndicators = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getKeySourceIndicators();
                boolean autogenerated = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).isAutogenerated();
                ARecordType aRecordType = (ARecordType) itemType;
                List<IAType> partitioningTypes = ValidateUtil.validatePartitioningExpressions(aRecordType, metaRecType, partitioningExprs, keySourceIndicators, autogenerated);
                List<String> filterField = ((InternalDetailsDecl) dd.getDatasetDetailsDecl()).getFilterField();
                if (filterField != null) {
                    ValidateUtil.validateFilterField(aRecordType, filterField);
                }
                if (compactionPolicy == null && filterField != null) {
                    // If the dataset has a filter and the user didn't specify a merge
                    // policy, then we will pick the
                    // correlated-prefix as the default merge policy.
                    compactionPolicy = GlobalConfig.DEFAULT_FILTERED_DATASET_COMPACTION_POLICY_NAME;
                    compactionPolicyProperties = GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES;
                }
                datasetDetails = new InternalDatasetDetails(InternalDatasetDetails.FileStructure.BTREE, InternalDatasetDetails.PartitioningStrategy.HASH, partitioningExprs, partitioningExprs, keySourceIndicators, partitioningTypes, autogenerated, filterField, temp);
                break;
            case EXTERNAL:
                String adapter = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getAdapter();
                Map<String, String> properties = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getProperties();
                datasetDetails = new ExternalDatasetDetails(adapter, properties, new Date(), TransactionState.COMMIT);
                break;
            default:
                throw new CompilationException("Unknown datatype " + dd.getDatasetType());
        }
        // #. initialize DatasetIdFactory if it is not initialized.
        if (!DatasetIdFactory.isInitialized()) {
            DatasetIdFactory.initialize(MetadataManager.INSTANCE.getMostRecentDatasetId());
        }
        // #. add a new dataset with PendingAddOp
        dataset = new Dataset(dataverseName, datasetName, itemTypeDataverseName, itemTypeName, metaItemTypeDataverseName, metaItemTypeName, ngName, compactionPolicy, compactionPolicyProperties, datasetDetails, dd.getHints(), dsType, DatasetIdFactory.generateDatasetId(), MetadataUtil.PENDING_ADD_OP);
        MetadataManager.INSTANCE.addDataset(metadataProvider.getMetadataTxnContext(), dataset);
        if (dd.getDatasetType() == DatasetType.INTERNAL) {
            JobSpecification jobSpec = DatasetUtil.createDatasetJobSpec(dataset, metadataProvider);
            // #. make metadataTxn commit before calling runJob.
            MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            bActiveTxn = false;
            progress.setValue(ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA);
            // #. runJob
            JobUtils.runJob(hcc, jobSpec, true);
            // #. begin new metadataTxn
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
        }
        // #. add a new dataset with PendingNoOp after deleting the dataset with PendingAddOp
        MetadataManager.INSTANCE.dropDataset(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
        dataset.setPendingOp(MetadataUtil.PENDING_NO_OP);
        MetadataManager.INSTANCE.addDataset(metadataProvider.getMetadataTxnContext(), dataset);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        if (progress.getValue() == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
            // #. execute compensation operations
            // remove the index in NC
            // [Notice]
            // As long as we updated(and committed) metadata, we should remove any effect of the job
            // because an exception occurs during runJob.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                JobSpecification jobSpec = DatasetUtil.dropDatasetJobSpec(dataset, metadataProvider);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
            // remove the record from the metadata.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                MetadataManager.INSTANCE.dropDataset(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                abort(e, e2, mdTxnCtx);
                throw new IllegalStateException("System is inconsistent state: pending dataset(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
            }
        }
        throw e;
    } finally {
        metadataProvider.getLocks().unlock();
    }
}
Also used : ProgressState(org.apache.asterix.common.utils.JobUtils.ProgressState) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) ExternalDetailsDecl(org.apache.asterix.lang.common.statement.ExternalDetailsDecl) DatasetType(org.apache.asterix.common.config.DatasetConfig.DatasetType) IDatasetDetails(org.apache.asterix.metadata.IDatasetDetails) Datatype(org.apache.asterix.metadata.entities.Datatype) DatasetDecl(org.apache.asterix.lang.common.statement.DatasetDecl) Identifier(org.apache.asterix.lang.common.struct.Identifier) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) ArrayList(java.util.ArrayList) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) MutableObject(org.apache.commons.lang3.mutable.MutableObject) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) InternalDatasetDetails(org.apache.asterix.metadata.entities.InternalDatasetDetails) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) InternalDetailsDecl(org.apache.asterix.lang.common.statement.InternalDetailsDecl) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Example 10 with ExternalDatasetDetails

use of org.apache.asterix.metadata.entities.ExternalDatasetDetails in project asterixdb by apache.

the class ExternalIndexingOperations method getIndexingOperator.

/**
     * This method create an indexing operator that index records in HDFS
     *
     * @param jobSpec
     * @param itemType
     * @param dataset
     * @param files
     * @param indexerDesc
     * @return
     * @throws AlgebricksException
     * @throws HyracksDataException
     * @throws Exception
     */
private static Pair<ExternalScanOperatorDescriptor, AlgebricksPartitionConstraint> getIndexingOperator(MetadataProvider metadataProvider, JobSpecification jobSpec, IAType itemType, Dataset dataset, List<ExternalFile> files, RecordDescriptor indexerDesc) throws HyracksDataException, AlgebricksException {
    ExternalDatasetDetails externalDatasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
    Map<String, String> configuration = externalDatasetDetails.getProperties();
    IAdapterFactory adapterFactory = AdapterFactoryProvider.getIndexingAdapterFactory(metadataProvider.getApplicationContext().getServiceContext(), externalDatasetDetails.getAdapter(), configuration, (ARecordType) itemType, files, true, null);
    return new Pair<>(new ExternalScanOperatorDescriptor(jobSpec, indexerDesc, adapterFactory), adapterFactory.getPartitionConstraint());
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) IAdapterFactory(org.apache.asterix.external.api.IAdapterFactory) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

ExternalDatasetDetails (org.apache.asterix.metadata.entities.ExternalDatasetDetails)10 Dataset (org.apache.asterix.metadata.entities.Dataset)7 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)6 IOException (java.io.IOException)5 Index (org.apache.asterix.metadata.entities.Index)5 ArrayList (java.util.ArrayList)4 Date (java.util.Date)4 List (java.util.List)4 AsterixException (org.apache.asterix.common.exceptions.AsterixException)4 CompilationException (org.apache.asterix.common.exceptions.CompilationException)4 ExternalFile (org.apache.asterix.external.indexing.ExternalFile)4 MetadataException (org.apache.asterix.metadata.MetadataException)4 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)4 JobSpecification (org.apache.hyracks.api.job.JobSpecification)4 RemoteException (java.rmi.RemoteException)3 TransactionState (org.apache.asterix.common.config.DatasetConfig.TransactionState)3 ACIDException (org.apache.asterix.common.exceptions.ACIDException)3 IDataset (org.apache.asterix.common.metadata.IDataset)3 MetadataTransactionContext (org.apache.asterix.metadata.MetadataTransactionContext)3 ARecordType (org.apache.asterix.om.types.ARecordType)3