Search in sources :

Example 36 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class DatasetUtil method buildDropFilesIndexJobSpec.

public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException {
    String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, indexHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second);
    spec.addRoot(btreeDrop);
    return spec;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IndexDropOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexDropOperatorDescriptor)

Example 37 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class DatasetUtil method createPrimaryIndexUpsertOp.

/**
     * Creates a primary index upsert operator for a given dataset.
     *
     * @param spec,
     *            the job specification.
     * @param metadataProvider,
     *            the metadata provider.
     * @param dataset,
     *            the dataset to upsert.
     * @param inputRecordDesc,the
     *            record descriptor for an input tuple.
     * @param fieldPermutation,
     *            the field permutation according to the input.
     * @param missingWriterFactory,
     *            the factory for customizing missing value serialization.
     * @return a primary index scan operator and its location constraints.
     * @throws AlgebricksException
     */
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
    int numKeys = dataset.getPrimaryKeys().size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
    ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
    try {
        Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
        // prepare callback
        JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
        int[] primaryKeyFields = new int[numKeys];
        for (int i = 0; i < numKeys; i++) {
            primaryKeyFields[i] = i;
        }
        boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
        IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
        IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
        ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
        IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
        LSMPrimaryUpsertOperatorDescriptor op;
        ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
        ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
        // add the previous record first
        int f = 0;
        outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
        f++;
        // add the previous meta second
        if (dataset.hasMetaPart()) {
            outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
            outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
            f++;
        }
        // add the previous filter third
        int fieldIdx = -1;
        if (numFilterFields > 0) {
            String filterField = DatasetUtil.getFilterField(dataset).get(0);
            String[] fieldNames = itemType.getFieldNames();
            int i = 0;
            for (; i < fieldNames.length; i++) {
                if (fieldNames[i].equals(filterField)) {
                    break;
                }
            }
            fieldIdx = i;
            outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
            outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
            f++;
        }
        for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
            outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
            outputSerDes[j + f] = inputRecordDesc.getFields()[j];
        }
        RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
        op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
        return new Pair<>(op, splitsAndConstraint.second);
    } catch (MetadataException me) {
        throw new AlgebricksException(me);
    }
}
Also used : LSMPrimaryUpsertOperatorDescriptor(org.apache.asterix.runtime.operators.LSMPrimaryUpsertOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) Index(org.apache.asterix.metadata.entities.Index) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.hyracks.algebricks.common.utils.Pair) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) ARecordType(org.apache.asterix.om.types.ARecordType)

Example 38 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class DatasetUtil method createPrimaryIndexScanOp.

/**
     * Creates a primary index scan operator for a given dataset.
     *
     * @param spec,
     *            the job specification.
     * @param metadataProvider,
     *            the metadata provider.
     * @param dataset,
     *            the dataset to scan.
     * @param jobId,
     *            the AsterixDB job id for transaction management.
     * @return a primary index scan operator.
     * @throws AlgebricksException
     */
public static IOperatorDescriptor createPrimaryIndexScanOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, JobId jobId) throws AlgebricksException {
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IFileSplitProvider primaryFileSplitProvider = primarySplitsAndConstraint.first;
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    // -Infinity
    int[] lowKeyFields = null;
    // +Infinity
    int[] highKeyFields = null;
    ITransactionSubsystemProvider txnSubsystemProvider = TransactionSubsystemProvider.INSTANCE;
    boolean temp = dataset.getDatasetDetails().isTemp();
    ISearchOperationCallbackFactory searchCallbackFactory = temp ? NoOpOperationCallbackFactory.INSTANCE : new PrimaryIndexInstantSearchOperationCallbackFactory(jobId, dataset.getDatasetId(), dataset.getPrimaryBloomFilterFields(), txnSubsystemProvider, IRecoveryManager.ResourceType.LSM_BTREE);
    IndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider);
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, dataset.getPrimaryRecordDescriptor(metadataProvider), lowKeyFields, highKeyFields, true, true, indexHelperFactory, false, false, null, searchCallbackFactory, null, null, false);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, primarySearchOp, primaryPartitionConstraint);
    return primarySearchOp;
}
Also used : PrimaryIndexInstantSearchOperationCallbackFactory(org.apache.asterix.transaction.management.opcallbacks.PrimaryIndexInstantSearchOperationCallbackFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ITransactionSubsystemProvider(org.apache.asterix.common.context.ITransactionSubsystemProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory)

Example 39 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class DatasetUtil method compactDatasetJobSpec.

public static JobSpecification compactDatasetJobSpec(Dataverse dataverse, String datasetName, MetadataProvider metadataProvider) throws AlgebricksException {
    String dataverseName = dataverse.getDataverseName();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    if (dataset == null) {
        throw new AsterixException("Could not find dataset " + datasetName + " in dataverse " + dataverseName);
    }
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, indexHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    spec.addRoot(compactOp);
    return spec;
}
Also used : LSMTreeIndexCompactOperatorDescriptor(org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor) AsterixException(org.apache.asterix.common.exceptions.AsterixException) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 40 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class DatasetUtil method createDatasetJobSpec.

public static JobSpecification createDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
    Index index = IndexUtil.getPrimaryIndex(dataset);
    ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
    // get meta item type
    ARecordType metaItemType = null;
    if (dataset.hasMetaPart()) {
        metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
    }
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    FileSplit[] fs = splitsAndConstraint.first.getFileSplits();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fs.length; i++) {
        sb.append(fs[i] + " ");
    }
    LOGGER.info("CREATING File Splits: " + sb.toString());
    Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
    //prepare a LocalResourceMetadata which will be stored in NC's local resource repository
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaItemType, compactionInfo.first, compactionInfo.second);
    IndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first, resourceFactory, !dataset.isTemp());
    IndexCreateOperatorDescriptor indexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second);
    spec.addRoot(indexCreateOp);
    return spec;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) Index(org.apache.asterix.metadata.entities.Index) FileSplit(org.apache.hyracks.api.io.FileSplit) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) ARecordType(org.apache.asterix.om.types.ARecordType) Map(java.util.Map) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) ILSMMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory) IndexCreateOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexCreateOperatorDescriptor)

Aggregations

AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)58 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)30 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)23 JobSpecification (org.apache.hyracks.api.job.JobSpecification)23 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)22 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)22 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)20 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)19 Pair (org.apache.hyracks.algebricks.common.utils.Pair)19 Index (org.apache.asterix.metadata.entities.Index)15 MetadataException (org.apache.asterix.metadata.MetadataException)14 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)14 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)14 Dataset (org.apache.asterix.metadata.entities.Dataset)12 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)11 IVariableTypeEnvironment (org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)10 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)9 AsterixException (org.apache.asterix.common.exceptions.AsterixException)8 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)8 IMetadataProvider (org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider)8