Search in sources :

Example 6 with ExternalFile

use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.

the class MetadataNode method getExternalFiles.

@Override
public List<ExternalFile> getExternalFiles(JobId jobId, Dataset dataset) throws MetadataException, RemoteException {
    try {
        ITupleReference searchKey = createTuple(dataset.getDataverseName(), dataset.getDatasetName());
        ExternalFileTupleTranslator tupleReaderWriter = tupleTranslatorProvider.getExternalFileTupleTranslator(false);
        IValueExtractor<ExternalFile> valueExtractor = new MetadataEntityValueExtractor<>(tupleReaderWriter);
        List<ExternalFile> results = new ArrayList<>();
        searchIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, searchKey, valueExtractor, results);
        return results;
    } catch (HyracksDataException e) {
        throw new MetadataException(e);
    }
}
Also used : MetadataEntityValueExtractor(org.apache.asterix.metadata.valueextractors.MetadataEntityValueExtractor) ExternalFileTupleTranslator(org.apache.asterix.metadata.entitytupletranslators.ExternalFileTupleTranslator) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ArrayList(java.util.ArrayList) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 7 with ExternalFile

use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.

the class QueryTranslator method handleExternalDatasetRefreshStatement.

protected void handleExternalDatasetRefreshStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
    RefreshExternalDatasetStatement stmtRefresh = (RefreshExternalDatasetStatement) stmt;
    String dataverseName = getActiveDataverse(stmtRefresh.getDataverseName());
    String datasetName = stmtRefresh.getDatasetName().getValue();
    TransactionState transactionState = TransactionState.COMMIT;
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    JobSpecification spec = null;
    Dataset ds = null;
    List<ExternalFile> metadataFiles = null;
    List<ExternalFile> deletedFiles = null;
    List<ExternalFile> addedFiles = null;
    List<ExternalFile> appendedFiles = null;
    List<Index> indexes = null;
    Dataset transactionDataset = null;
    boolean lockAquired = false;
    boolean success = false;
    MetadataLockManager.INSTANCE.refreshDatasetBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
    try {
        ds = metadataProvider.findDataset(dataverseName, datasetName);
        // Dataset exists ?
        if (ds == null) {
            throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
        }
        // Dataset external ?
        if (ds.getDatasetType() != DatasetType.EXTERNAL) {
            throw new AlgebricksException("dataset " + datasetName + " in dataverse " + dataverseName + " is not an external dataset");
        }
        // Dataset has indexes ?
        indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx, dataverseName, datasetName);
        if (indexes.isEmpty()) {
            throw new AlgebricksException("External dataset " + datasetName + " in dataverse " + dataverseName + " doesn't have any index");
        }
        // Record transaction time
        Date txnTime = new Date();
        // refresh lock here
        ExternalDatasetsRegistry.INSTANCE.refreshBegin(ds);
        lockAquired = true;
        // Get internal files
        metadataFiles = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, ds);
        deletedFiles = new ArrayList<>();
        addedFiles = new ArrayList<>();
        appendedFiles = new ArrayList<>();
        // Now we compare snapshot with external file system
        if (ExternalIndexingOperations.isDatasetUptodate(ds, metadataFiles, addedFiles, deletedFiles, appendedFiles)) {
            ((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(txnTime);
            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
            MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            // latch will be released in the finally clause
            return;
        }
        // At this point, we know data has changed in the external file system, record
        // transaction in metadata and start
        transactionDataset = ExternalIndexingOperations.createTransactionDataset(ds);
        /*
             * Remove old dataset record and replace it with a new one
             */
        MetadataManager.INSTANCE.updateDataset(mdTxnCtx, transactionDataset);
        // Add delta files to the metadata
        for (ExternalFile file : addedFiles) {
            MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
        }
        for (ExternalFile file : appendedFiles) {
            MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
        }
        for (ExternalFile file : deletedFiles) {
            MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
        }
        // Create the files index update job
        spec = ExternalIndexingOperations.buildFilesIndexUpdateOp(ds, metadataFiles, addedFiles, appendedFiles, metadataProvider);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        transactionState = TransactionState.BEGIN;
        // run the files update job
        JobUtils.runJob(hcc, spec, true);
        for (Index index : indexes) {
            if (!ExternalIndexingOperations.isFileIndex(index)) {
                spec = ExternalIndexingOperations.buildIndexUpdateOp(ds, index, metadataFiles, addedFiles, appendedFiles, metadataProvider);
                // run the files update job
                JobUtils.runJob(hcc, spec, true);
            }
        }
        // all index updates has completed successfully, record transaction state
        spec = ExternalIndexingOperations.buildCommitJob(ds, indexes, metadataProvider);
        // Aquire write latch again -> start a transaction and record the decision to commit
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        bActiveTxn = true;
        ((ExternalDatasetDetails) transactionDataset.getDatasetDetails()).setState(TransactionState.READY_TO_COMMIT);
        ((ExternalDatasetDetails) transactionDataset.getDatasetDetails()).setRefreshTimestamp(txnTime);
        MetadataManager.INSTANCE.updateDataset(mdTxnCtx, transactionDataset);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        transactionState = TransactionState.READY_TO_COMMIT;
        // We don't release the latch since this job is expected to be quick
        JobUtils.runJob(hcc, spec, true);
        // Start a new metadata transaction to record the final state of the transaction
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        bActiveTxn = true;
        for (ExternalFile file : metadataFiles) {
            if (file.getPendingOp() == ExternalFilePendingOp.DROP_OP) {
                MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
            } else if (file.getPendingOp() == ExternalFilePendingOp.NO_OP) {
                Iterator<ExternalFile> iterator = appendedFiles.iterator();
                while (iterator.hasNext()) {
                    ExternalFile appendedFile = iterator.next();
                    if (file.getFileName().equals(appendedFile.getFileName())) {
                        // delete existing file
                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                        // delete existing appended file
                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, appendedFile);
                        // add the original file with appended information
                        appendedFile.setFileNumber(file.getFileNumber());
                        appendedFile.setPendingOp(ExternalFilePendingOp.NO_OP);
                        MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, appendedFile);
                        iterator.remove();
                    }
                }
            }
        }
        // remove the deleted files delta
        for (ExternalFile file : deletedFiles) {
            MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
        }
        // insert new files
        for (ExternalFile file : addedFiles) {
            MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
            file.setPendingOp(ExternalFilePendingOp.NO_OP);
            MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
        }
        // mark the transaction as complete
        ((ExternalDatasetDetails) transactionDataset.getDatasetDetails()).setState(TransactionState.COMMIT);
        MetadataManager.INSTANCE.updateDataset(mdTxnCtx, transactionDataset);
        // commit metadata transaction
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        success = true;
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        if (transactionState == TransactionState.READY_TO_COMMIT) {
            throw new IllegalStateException("System is inconsistent state: commit of (" + dataverseName + "." + datasetName + ") refresh couldn't carry out the commit phase", e);
        }
        if (transactionState == TransactionState.COMMIT) {
            // Nothing to do , everything should be clean
            throw e;
        }
        if (transactionState == TransactionState.BEGIN) {
            // transaction failed, need to do the following
            // clean NCs removing transaction components
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            spec = ExternalIndexingOperations.buildAbortOp(ds, indexes, metadataProvider);
            MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            bActiveTxn = false;
            try {
                JobUtils.runJob(hcc, spec, true);
            } catch (Exception e2) {
                // This should never happen -- fix throw illegal
                e.addSuppressed(e2);
                throw new IllegalStateException("System is in inconsistent state. Failed to abort refresh", e);
            }
            // return the state of the dataset to committed
            try {
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                for (ExternalFile file : deletedFiles) {
                    MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                }
                for (ExternalFile file : addedFiles) {
                    MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                }
                for (ExternalFile file : appendedFiles) {
                    MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                }
                MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
                // commit metadata transaction
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                abort(e, e2, mdTxnCtx);
                e.addSuppressed(e2);
                throw new IllegalStateException("System is in inconsistent state. Failed to drop delta files", e);
            }
        }
    } finally {
        if (lockAquired) {
            ExternalDatasetsRegistry.INSTANCE.refreshEnd(ds, success);
        }
        metadataProvider.getLocks().unlock();
    }
}
Also used : TransactionState(org.apache.asterix.common.config.DatasetConfig.TransactionState) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Index(org.apache.asterix.metadata.entities.Index) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) RefreshExternalDatasetStatement(org.apache.asterix.lang.common.statement.RefreshExternalDatasetStatement) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) Iterator(java.util.Iterator) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 8 with ExternalFile

use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.

the class GlobalRecoveryManager method recoverDataset.

private MetadataTransactionContext recoverDataset(ICcApplicationContext appCtx, MetadataTransactionContext mdTxnCtx, Dataverse dataverse) throws Exception {
    if (!dataverse.getDataverseName().equals(MetadataConstants.METADATA_DATAVERSE_NAME)) {
        MetadataProvider metadataProvider = new MetadataProvider(appCtx, dataverse, componentProvider);
        try {
            List<Dataset> datasets = MetadataManager.INSTANCE.getDataverseDatasets(mdTxnCtx, dataverse.getDataverseName());
            for (Dataset dataset : datasets) {
                if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
                    // External dataset
                    // Get indexes
                    List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName());
                    // Get the state of the dataset
                    ExternalDatasetDetails dsd = (ExternalDatasetDetails) dataset.getDatasetDetails();
                    TransactionState datasetState = dsd.getState();
                    if (!indexes.isEmpty()) {
                        if (datasetState == TransactionState.BEGIN) {
                            List<ExternalFile> files = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, dataset);
                            // 1. delete all pending files
                            for (ExternalFile file : files) {
                                if (file.getPendingOp() != ExternalFilePendingOp.NO_OP) {
                                    MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                                }
                            }
                        }
                        // 2. clean artifacts in NCs
                        metadataProvider.setMetadataTxnContext(mdTxnCtx);
                        JobSpecification jobSpec = ExternalIndexingOperations.buildAbortOp(dataset, indexes, metadataProvider);
                        executeHyracksJob(jobSpec);
                        // 3. correct the dataset state
                        ((ExternalDatasetDetails) dataset.getDatasetDetails()).setState(TransactionState.COMMIT);
                        MetadataManager.INSTANCE.updateDataset(mdTxnCtx, dataset);
                        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                    } else if (datasetState == TransactionState.READY_TO_COMMIT) {
                        List<ExternalFile> files = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, dataset);
                        // if ready to commit, roll forward
                        // 1. commit indexes in NCs
                        metadataProvider.setMetadataTxnContext(mdTxnCtx);
                        JobSpecification jobSpec = ExternalIndexingOperations.buildRecoverOp(dataset, indexes, metadataProvider);
                        executeHyracksJob(jobSpec);
                        // 2. add pending files in metadata
                        for (ExternalFile file : files) {
                            if (file.getPendingOp() == ExternalFilePendingOp.ADD_OP) {
                                MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                                file.setPendingOp(ExternalFilePendingOp.NO_OP);
                                MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
                            } else if (file.getPendingOp() == ExternalFilePendingOp.DROP_OP) {
                                // find original file
                                for (ExternalFile originalFile : files) {
                                    if (originalFile.getFileName().equals(file.getFileName())) {
                                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, originalFile);
                                        break;
                                    }
                                }
                            } else if (file.getPendingOp() == ExternalFilePendingOp.APPEND_OP) {
                                // find original file
                                for (ExternalFile originalFile : files) {
                                    if (originalFile.getFileName().equals(file.getFileName())) {
                                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, file);
                                        MetadataManager.INSTANCE.dropExternalFile(mdTxnCtx, originalFile);
                                        originalFile.setSize(file.getSize());
                                        MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, originalFile);
                                    }
                                }
                            }
                            // 3. correct the dataset state
                            ((ExternalDatasetDetails) dataset.getDatasetDetails()).setState(TransactionState.COMMIT);
                            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, dataset);
                            MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                        }
                    }
                }
            }
        } finally {
            metadataProvider.getLocks().unlock();
        }
    }
    return mdTxnCtx;
}
Also used : TransactionState(org.apache.asterix.common.config.DatasetConfig.TransactionState) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) Dataset(org.apache.asterix.metadata.entities.Dataset) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) Index(org.apache.asterix.metadata.entities.Index) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) ExternalFile(org.apache.asterix.external.indexing.ExternalFile)

Example 9 with ExternalFile

use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.

the class MetadataProvider method getConfiguredAdapterFactory.

protected IAdapterFactory getConfiguredAdapterFactory(Dataset dataset, String adapterName, Map<String, String> configuration, ARecordType itemType, ARecordType metaType) throws AlgebricksException {
    try {
        configuration.put(ExternalDataConstants.KEY_DATAVERSE, dataset.getDataverseName());
        IAdapterFactory adapterFactory = AdapterFactoryProvider.getAdapterFactory(getApplicationContext().getServiceContext(), adapterName, configuration, itemType, metaType);
        // check to see if dataset is indexed
        Index filesIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName().concat(IndexingConstants.EXTERNAL_FILE_INDEX_NAME_SUFFIX));
        if (filesIndex != null && filesIndex.getPendingOp() == 0) {
            // get files
            List<ExternalFile> files = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, dataset);
            Iterator<ExternalFile> iterator = files.iterator();
            while (iterator.hasNext()) {
                if (iterator.next().getPendingOp() != ExternalFilePendingOp.NO_OP) {
                    iterator.remove();
                }
            }
        }
        return adapterFactory;
    } catch (Exception e) {
        throw new AlgebricksException("Unable to create adapter", e);
    }
}
Also used : AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) IAdapterFactory(org.apache.asterix.external.api.IAdapterFactory) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) AsterixException(org.apache.asterix.common.exceptions.AsterixException)

Example 10 with ExternalFile

use of org.apache.asterix.external.indexing.ExternalFile in project asterixdb by apache.

the class QueryTranslator method handleCreateIndexStatement.

protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
    ProgressState progress = ProgressState.NO_PROGRESS;
    CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
    String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
    String datasetName = stmtCreateIndex.getDatasetName().getValue();
    List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
    String indexName = null;
    JobSpecification spec = null;
    Dataset ds = null;
    // For external datasets
    List<ExternalFile> externalFilesSnapshot = null;
    boolean firstExternalDatasetIndex = false;
    boolean filesIndexReplicated = false;
    Index filesIndex = null;
    boolean datasetLocked = false;
    Index index = null;
    try {
        ds = metadataProvider.findDataset(dataverseName, datasetName);
        if (ds == null) {
            throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
        }
        indexName = stmtCreateIndex.getIndexName().getValue();
        index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
        ARecordType aRecordType = (ARecordType) dt.getDatatype();
        ARecordType metaRecordType = null;
        if (ds.hasMetaPart()) {
            Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
            metaRecordType = (ARecordType) metaDt.getDatatype();
        }
        List<List<String>> indexFields = new ArrayList<>();
        List<IAType> indexFieldTypes = new ArrayList<>();
        int keyIndex = 0;
        for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
            IAType fieldType = null;
            ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
            boolean isOpen = subType.isOpen();
            int i = 0;
            if (fieldExpr.first.size() > 1 && !isOpen) {
                while (i < fieldExpr.first.size() - 1 && !isOpen) {
                    subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
                    i++;
                    isOpen = subType.isOpen();
                }
            }
            if (fieldExpr.second == null) {
                fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
            } else {
                if (!stmtCreateIndex.isEnforced()) {
                    throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
                }
                if (!isOpen) {
                    throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
                }
                if (stmtCreateIndex.hasMetaField()) {
                    throw new AlgebricksException("Typed open index can only be created on the record part");
                }
                Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
                TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
                fieldType = typeMap.get(typeSignature);
            }
            if (fieldType == null) {
                throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
            }
            indexFields.add(fieldExpr.first);
            indexFieldTypes.add(fieldType);
            ++keyIndex;
        }
        ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
        if (index != null) {
            if (stmtCreateIndex.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("An index with this name " + indexName + " already exists.");
            }
        }
        // error message and stop.
        if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
            List<List<String>> partitioningKeys = ds.getPrimaryKeys();
            for (List<String> partitioningKey : partitioningKeys) {
                IAType keyType = aRecordType.getSubFieldType(partitioningKey);
                ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
                // If it is not a fixed length
                if (typeTrait.getFixedLength() < 0) {
                    throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
                }
            }
        }
        if (ds.getDatasetType() == DatasetType.INTERNAL) {
            validateIfResourceIsActiveInFeed(ds);
        } else {
            // Check if the dataset is indexible
            if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
                throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
            }
            // Check if the name of the index is valid
            if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
                throw new AlgebricksException("external dataset index name is invalid");
            }
            // Check if the files index exist
            filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
            firstExternalDatasetIndex = filesIndex == null;
            // Lock external dataset
            ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
            datasetLocked = true;
            if (firstExternalDatasetIndex) {
                // Verify that no one has created an index before we acquire the lock
                filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                if (filesIndex != null) {
                    ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
                    firstExternalDatasetIndex = false;
                    ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
                }
            }
            if (firstExternalDatasetIndex) {
                // Get snapshot from External File System
                externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
                // Add an entry for the files index
                filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
                MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
                // Add files to the external files index
                for (ExternalFile file : externalFilesSnapshot) {
                    MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
                }
                // This is the first index for the external dataset, replicate the files index
                spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
                if (spec == null) {
                    throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
                }
                filesIndexReplicated = true;
                JobUtils.runJob(hcc, spec, true);
            }
        }
        // check whether there exists another enforced index on the same field
        if (stmtCreateIndex.isEnforced()) {
            List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
            for (Index existingIndex : indexes) {
                if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
                    throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
                }
            }
        }
        // #. add a new index with PendingAddOp
        index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // #. prepare to create the index artifact in NC.
        spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
        if (spec == null) {
            throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
        // #. create the index artifact in NC.
        JobUtils.runJob(hcc, spec, true);
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. load data into the index in NC.
        spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        JobUtils.runJob(hcc, spec, true);
        // #. begin new metadataTxn
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. add another new index with PendingNoOp after deleting the index with PendingAddOp
        MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        index.setPendingOp(MetadataUtil.PENDING_NO_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // PendingAddOp
        if (firstExternalDatasetIndex) {
            MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
            filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
            MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
            // update transaction timestamp
            ((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        // If files index was replicated for external dataset, it should be cleaned up on NC side
        if (filesIndexReplicated) {
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            try {
                JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
        }
        if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
            // #. execute compensation operations
            // remove the index in NC
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
            if (firstExternalDatasetIndex) {
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop External Files from metadata
                    MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
                }
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop the files index from metadata
                    MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
                }
            }
            // remove the record from the metadata.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                abort(e, e2, mdTxnCtx);
                throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
            }
        }
        throw e;
    } finally {
        metadataProvider.getLocks().unlock();
        if (datasetLocked) {
            ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
        }
    }
}
Also used : ProgressState(org.apache.asterix.common.utils.JobUtils.ProgressState) ArrayList(java.util.ArrayList) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Index(org.apache.asterix.metadata.entities.Index) Datatype(org.apache.asterix.metadata.entities.Datatype) TypeSignature(org.apache.asterix.om.types.TypeSignature) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) ArrayList(java.util.ArrayList) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) CompilationException(org.apache.asterix.common.exceptions.CompilationException) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) TypeExpression(org.apache.asterix.lang.common.expression.TypeExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CreateIndexStatement(org.apache.asterix.lang.common.statement.CreateIndexStatement) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) DatasetNodegroupCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetNodegroupCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Aggregations

ExternalFile (org.apache.asterix.external.indexing.ExternalFile)17 ArrayList (java.util.ArrayList)7 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)7 Index (org.apache.asterix.metadata.entities.Index)5 IOException (java.io.IOException)4 Date (java.util.Date)4 Dataset (org.apache.asterix.metadata.entities.Dataset)4 ExternalDatasetDetails (org.apache.asterix.metadata.entities.ExternalDatasetDetails)4 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)4 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)4 ACIDException (org.apache.asterix.common.exceptions.ACIDException)3 AsterixException (org.apache.asterix.common.exceptions.AsterixException)3 CompilationException (org.apache.asterix.common.exceptions.CompilationException)3 MetadataException (org.apache.asterix.metadata.MetadataException)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 Path (org.apache.hadoop.fs.Path)3 JobSpecification (org.apache.hyracks.api.job.JobSpecification)3 IIndex (org.apache.hyracks.storage.common.IIndex)3 RemoteException (java.rmi.RemoteException)2 List (java.util.List)2