Examples with JobSpecification - org.apache.hyracks.api.job.JobSpecification

Example 91 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class TPCHCustomerOrderHashJoinTest method customerOrderCIDInMemoryHashLeftOuterJoin.

@Test
public void customerOrderCIDInMemoryHashLeftOuterJoin() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
    IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[ordersDesc.getFieldCount()];
    for (int j = 0; j < nonMatchWriterFactories.length; j++) {
        nonMatchWriterFactories[j] = NoopMissingWriterFactory.INSTANCE;
    }
    InMemoryHashJoinOperatorDescriptor join = new InMemoryHashJoinOperatorDescriptor(spec, new int[] { 0 }, new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, null, custOrderJoinDesc, true, nonMatchWriterFactories, 128, 128);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 1);
    IConnectorDescriptor custJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 0);
    IConnectorDescriptor joinPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) InMemoryHashJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 92 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class TPCHCustomerOrderNestedLoopJoinTest method customerOrderCIDOuterJoinMulti.

@Test
public void customerOrderCIDOuterJoinMulti() throws Exception {
    JobSpecification spec = new JobSpecification();
    FileSplit[] custSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer-part2.tbl") };
    IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
    RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
    IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
    RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
    FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
    IMissingWriterFactory[] nonMatchWriterFactories = new IMissingWriterFactory[ordersDesc.getFieldCount()];
    for (int j = 0; j < nonMatchWriterFactories.length; j++) {
        nonMatchWriterFactories[j] = NoopMissingWriterFactory.INSTANCE;
    }
    NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5, true, nonMatchWriterFactories);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
    IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(ordJoinConn, ordScanner, 0, join, 0);
    IConnectorDescriptor custJoinConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(custJoinConn, custScanner, 0, join, 1);
    IConnectorDescriptor joinPrinterConn = new MToNBroadcastConnectorDescriptor(spec);
    spec.connect(joinPrinterConn, join, 0, printer, 0);
    spec.addRoot(printer);
    runTest(spec);
}

Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IMissingWriterFactory(org.apache.hyracks.api.dataflow.value.IMissingWriterFactory) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) MToNBroadcastConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) NestedLoopJoinOperatorDescriptor(org.apache.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Test(org.junit.Test)

Example 93 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class UnionTest method createUnionJobSpec.

public static JobSpecification createUnionJobSpec() throws Exception {
    JobSpecification spec = new JobSpecification();
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt"), new ManagedFileSplit(NC1_ID, "data" + File.separator + "nc1" + File.separator + "words.txt") });
    RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner01, NC2_ID, NC1_ID);
    FileScanOperatorDescriptor csvScanner02 = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner02, NC2_ID, NC1_ID);
    UnionAllOperatorDescriptor unionAll = new UnionAllOperatorDescriptor(spec, 2, desc);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, unionAll, NC2_ID, NC1_ID);
    ResultSetId rsId = new ResultSetId(1);
    spec.addResultSetId(rsId);
    IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner01, 0, unionAll, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner02, 0, unionAll, 1);
    spec.connect(new OneToOneConnectorDescriptor(spec), unionAll, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}

Also used : IValueParserFactory(org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ResultWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UnionAllOperatorDescriptor(org.apache.hyracks.dataflow.std.union.UnionAllOperatorDescriptor)

Example 94 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class JobManager method executeJob.

// Executes a job when the required capacity for the job is met.
private void executeJob(JobRun run) throws HyracksException {
    run.setStartTime(System.currentTimeMillis());
    JobId jobId = run.getJobId();
    activeRunMap.put(jobId, run);
    CCServiceContext serviceCtx = ccs.getContext();
    JobSpecification spec = run.getJobSpecification();
    if (!run.getExecutor().isPredistributed()) {
        serviceCtx.notifyJobCreation(jobId, spec);
    }
    run.setStatus(JobStatus.RUNNING, null);
    executeJobInternal(run);
}

Also used : JobSpecification(org.apache.hyracks.api.job.JobSpecification) CCServiceContext(org.apache.hyracks.control.cc.application.CCServiceContext) JobId(org.apache.hyracks.api.job.JobId)

Example 95 with JobSpecification

use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.

the class QueryTranslator method handleCreateIndexStatement.

protected void handleCreateIndexStatement(MetadataProvider metadataProvider, Statement stmt, IHyracksClientConnection hcc) throws Exception {
    ProgressState progress = ProgressState.NO_PROGRESS;
    CreateIndexStatement stmtCreateIndex = (CreateIndexStatement) stmt;
    String dataverseName = getActiveDataverse(stmtCreateIndex.getDataverseName());
    String datasetName = stmtCreateIndex.getDatasetName().getValue();
    List<Integer> keySourceIndicators = stmtCreateIndex.getFieldSourceIndicators();
    MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
    boolean bActiveTxn = true;
    metadataProvider.setMetadataTxnContext(mdTxnCtx);
    MetadataLockManager.INSTANCE.createIndexBegin(metadataProvider.getLocks(), dataverseName, dataverseName + "." + datasetName);
    String indexName = null;
    JobSpecification spec = null;
    Dataset ds = null;
    // For external datasets
    List<ExternalFile> externalFilesSnapshot = null;
    boolean firstExternalDatasetIndex = false;
    boolean filesIndexReplicated = false;
    Index filesIndex = null;
    boolean datasetLocked = false;
    Index index = null;
    try {
        ds = metadataProvider.findDataset(dataverseName, datasetName);
        if (ds == null) {
            throw new AlgebricksException("There is no dataset with this name " + datasetName + " in dataverse " + dataverseName);
        }
        indexName = stmtCreateIndex.getIndexName().getValue();
        index = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        Datatype dt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getItemTypeDataverseName(), ds.getItemTypeName());
        ARecordType aRecordType = (ARecordType) dt.getDatatype();
        ARecordType metaRecordType = null;
        if (ds.hasMetaPart()) {
            Datatype metaDt = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), ds.getMetaItemTypeDataverseName(), ds.getMetaItemTypeName());
            metaRecordType = (ARecordType) metaDt.getDatatype();
        }
        List<List<String>> indexFields = new ArrayList<>();
        List<IAType> indexFieldTypes = new ArrayList<>();
        int keyIndex = 0;
        for (Pair<List<String>, TypeExpression> fieldExpr : stmtCreateIndex.getFieldExprs()) {
            IAType fieldType = null;
            ARecordType subType = KeyFieldTypeUtil.chooseSource(keySourceIndicators, keyIndex, aRecordType, metaRecordType);
            boolean isOpen = subType.isOpen();
            int i = 0;
            if (fieldExpr.first.size() > 1 && !isOpen) {
                while (i < fieldExpr.first.size() - 1 && !isOpen) {
                    subType = (ARecordType) subType.getFieldType(fieldExpr.first.get(i));
                    i++;
                    isOpen = subType.isOpen();
                }
            }
            if (fieldExpr.second == null) {
                fieldType = subType.getSubFieldType(fieldExpr.first.subList(i, fieldExpr.first.size()));
            } else {
                if (!stmtCreateIndex.isEnforced()) {
                    throw new AlgebricksException("Cannot create typed index on \"" + fieldExpr.first + "\" field without enforcing it's type");
                }
                if (!isOpen) {
                    throw new AlgebricksException("Typed index on \"" + fieldExpr.first + "\" field could be created only for open datatype");
                }
                if (stmtCreateIndex.hasMetaField()) {
                    throw new AlgebricksException("Typed open index can only be created on the record part");
                }
                Map<TypeSignature, IAType> typeMap = TypeTranslator.computeTypes(mdTxnCtx, fieldExpr.second, indexName, dataverseName);
                TypeSignature typeSignature = new TypeSignature(dataverseName, indexName);
                fieldType = typeMap.get(typeSignature);
            }
            if (fieldType == null) {
                throw new AlgebricksException("Unknown type " + (fieldExpr.second == null ? fieldExpr.first : fieldExpr.second));
            }
            indexFields.add(fieldExpr.first);
            indexFieldTypes.add(fieldType);
            ++keyIndex;
        }
        ValidateUtil.validateKeyFields(aRecordType, metaRecordType, indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getIndexType());
        if (index != null) {
            if (stmtCreateIndex.getIfNotExists()) {
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                return;
            } else {
                throw new AlgebricksException("An index with this name " + indexName + " already exists.");
            }
        }
        // error message and stop.
        if (stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || stmtCreateIndex.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
            List<List<String>> partitioningKeys = ds.getPrimaryKeys();
            for (List<String> partitioningKey : partitioningKeys) {
                IAType keyType = aRecordType.getSubFieldType(partitioningKey);
                ITypeTraits typeTrait = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
                // If it is not a fixed length
                if (typeTrait.getFixedLength() < 0) {
                    throw new AlgebricksException("The keyword or ngram index -" + indexName + " cannot be created on the dataset -" + datasetName + " due to its variable-length primary key field - " + partitioningKey);
                }
            }
        }
        if (ds.getDatasetType() == DatasetType.INTERNAL) {
            validateIfResourceIsActiveInFeed(ds);
        } else {
            // Check if the dataset is indexible
            if (!ExternalIndexingOperations.isIndexible((ExternalDatasetDetails) ds.getDatasetDetails())) {
                throw new AlgebricksException("dataset using " + ((ExternalDatasetDetails) ds.getDatasetDetails()).getAdapter() + " Adapter can't be indexed");
            }
            // Check if the name of the index is valid
            if (!ExternalIndexingOperations.isValidIndexName(datasetName, indexName)) {
                throw new AlgebricksException("external dataset index name is invalid");
            }
            // Check if the files index exist
            filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
            firstExternalDatasetIndex = filesIndex == null;
            // Lock external dataset
            ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
            datasetLocked = true;
            if (firstExternalDatasetIndex) {
                // Verify that no one has created an index before we acquire the lock
                filesIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                if (filesIndex != null) {
                    ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
                    firstExternalDatasetIndex = false;
                    ExternalDatasetsRegistry.INSTANCE.buildIndexBegin(ds, firstExternalDatasetIndex);
                }
            }
            if (firstExternalDatasetIndex) {
                // Get snapshot from External File System
                externalFilesSnapshot = ExternalIndexingOperations.getSnapshotFromExternalFileSystem(ds);
                // Add an entry for the files index
                filesIndex = new Index(dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName), IndexType.BTREE, ExternalIndexingOperations.FILE_INDEX_FIELD_NAMES, null, ExternalIndexingOperations.FILE_INDEX_FIELD_TYPES, false, false, MetadataUtil.PENDING_ADD_OP);
                MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
                // Add files to the external files index
                for (ExternalFile file : externalFilesSnapshot) {
                    MetadataManager.INSTANCE.addExternalFile(mdTxnCtx, file);
                }
                // This is the first index for the external dataset, replicate the files index
                spec = ExternalIndexingOperations.buildFilesIndexCreateJobSpec(ds, externalFilesSnapshot, metadataProvider);
                if (spec == null) {
                    throw new CompilationException("Failed to create job spec for replicating Files Index For external dataset");
                }
                filesIndexReplicated = true;
                JobUtils.runJob(hcc, spec, true);
            }
        }
        // check whether there exists another enforced index on the same field
        if (stmtCreateIndex.isEnforced()) {
            List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName);
            for (Index existingIndex : indexes) {
                if (existingIndex.getKeyFieldNames().equals(indexFields) && !existingIndex.getKeyFieldTypes().equals(indexFieldTypes) && existingIndex.isEnforcingKeyFileds()) {
                    throw new CompilationException("Cannot create index " + indexName + " , enforced index " + existingIndex.getIndexName() + " on field \"" + StringUtils.join(indexFields, ',') + "\" is already defined with type \"" + existingIndex.getKeyFieldTypes() + "\"");
                }
            }
        }
        // #. add a new index with PendingAddOp
        index = new Index(dataverseName, datasetName, indexName, stmtCreateIndex.getIndexType(), indexFields, keySourceIndicators, indexFieldTypes, stmtCreateIndex.getGramLength(), stmtCreateIndex.isEnforced(), false, MetadataUtil.PENDING_ADD_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // #. prepare to create the index artifact in NC.
        spec = IndexUtil.buildSecondaryIndexCreationJobSpec(ds, index, metadataProvider);
        if (spec == null) {
            throw new CompilationException("Failed to create job spec for creating index '" + stmtCreateIndex.getDatasetName() + "." + stmtCreateIndex.getIndexName() + "'");
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        progress = ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA;
        // #. create the index artifact in NC.
        JobUtils.runJob(hcc, spec, true);
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. load data into the index in NC.
        spec = IndexUtil.buildSecondaryIndexLoadingJobSpec(ds, index, metadataProvider);
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
        bActiveTxn = false;
        JobUtils.runJob(hcc, spec, true);
        // #. begin new metadataTxn
        mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        bActiveTxn = true;
        metadataProvider.setMetadataTxnContext(mdTxnCtx);
        // #. add another new index with PendingNoOp after deleting the index with PendingAddOp
        MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
        index.setPendingOp(MetadataUtil.PENDING_NO_OP);
        MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), index);
        // PendingAddOp
        if (firstExternalDatasetIndex) {
            MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, filesIndex.getIndexName());
            filesIndex.setPendingOp(MetadataUtil.PENDING_NO_OP);
            MetadataManager.INSTANCE.addIndex(metadataProvider.getMetadataTxnContext(), filesIndex);
            // update transaction timestamp
            ((ExternalDatasetDetails) ds.getDatasetDetails()).setRefreshTimestamp(new Date());
            MetadataManager.INSTANCE.updateDataset(mdTxnCtx, ds);
        }
        MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
    } catch (Exception e) {
        if (bActiveTxn) {
            abort(e, e, mdTxnCtx);
        }
        // If files index was replicated for external dataset, it should be cleaned up on NC side
        if (filesIndexReplicated) {
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            try {
                JobSpecification jobSpec = ExternalIndexingOperations.buildDropFilesIndexJobSpec(metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
        }
        if (progress == ProgressState.ADDED_PENDINGOP_RECORD_TO_METADATA) {
            // #. execute compensation operations
            // remove the index in NC
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            bActiveTxn = true;
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                JobSpecification jobSpec = IndexUtil.buildDropIndexJobSpec(index, metadataProvider, ds);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                bActiveTxn = false;
                JobUtils.runJob(hcc, jobSpec, true);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                if (bActiveTxn) {
                    abort(e, e2, mdTxnCtx);
                }
            }
            if (firstExternalDatasetIndex) {
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop External Files from metadata
                    MetadataManager.INSTANCE.dropDatasetExternalFiles(mdTxnCtx, ds);
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending files for(" + dataverseName + "." + datasetName + ") couldn't be removed from the metadata", e);
                }
                mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
                metadataProvider.setMetadataTxnContext(mdTxnCtx);
                try {
                    // Drop the files index from metadata
                    MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, IndexingConstants.getFilesIndexName(datasetName));
                    MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
                } catch (Exception e2) {
                    e.addSuppressed(e2);
                    abort(e, e2, mdTxnCtx);
                    throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + IndexingConstants.getFilesIndexName(datasetName) + ") couldn't be removed from the metadata", e);
                }
            }
            // remove the record from the metadata.
            mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            try {
                MetadataManager.INSTANCE.dropIndex(metadataProvider.getMetadataTxnContext(), dataverseName, datasetName, indexName);
                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            } catch (Exception e2) {
                e.addSuppressed(e2);
                abort(e, e2, mdTxnCtx);
                throw new IllegalStateException("System is in inconsistent state: pending index(" + dataverseName + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
            }
        }
        throw e;
    } finally {
        metadataProvider.getLocks().unlock();
        if (datasetLocked) {
            ExternalDatasetsRegistry.INSTANCE.buildIndexEnd(ds, firstExternalDatasetIndex);
        }
    }
}

Also used : ProgressState(org.apache.asterix.common.utils.JobUtils.ProgressState) ArrayList(java.util.ArrayList) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) Index(org.apache.asterix.metadata.entities.Index) Datatype(org.apache.asterix.metadata.entities.Datatype) TypeSignature(org.apache.asterix.om.types.TypeSignature) ExternalDatasetDetails(org.apache.asterix.metadata.entities.ExternalDatasetDetails) ArrayList(java.util.ArrayList) List(java.util.List) JobSpecification(org.apache.hyracks.api.job.JobSpecification) CompilationException(org.apache.asterix.common.exceptions.CompilationException) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IHyracksDataset(org.apache.hyracks.api.dataset.IHyracksDataset) IDataset(org.apache.asterix.common.metadata.IDataset) Dataset(org.apache.asterix.metadata.entities.Dataset) TypeExpression(org.apache.asterix.lang.common.expression.TypeExpression) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CreateIndexStatement(org.apache.asterix.lang.common.statement.CreateIndexStatement) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) DatasetNodegroupCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetNodegroupCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Date(java.util.Date) ACIDException(org.apache.asterix.common.exceptions.ACIDException) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) RemoteException(java.rmi.RemoteException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Aggregations

JobSpecification (org.apache.hyracks.api.job.JobSpecification)182 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)90 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)88 Test (org.junit.Test)82 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)77 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)67 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)61 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)59 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)59 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)45 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)45 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)41 FileSplit (org.apache.hyracks.api.io.FileSplit)40 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)38 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)35 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)29 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)29 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23