Search in sources :

Example 26 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class DatasetUtil method createPrimaryIndexUpsertOp.

/**
     * Creates a primary index upsert operator for a given dataset.
     *
     * @param spec,
     *            the job specification.
     * @param metadataProvider,
     *            the metadata provider.
     * @param dataset,
     *            the dataset to upsert.
     * @param inputRecordDesc,the
     *            record descriptor for an input tuple.
     * @param fieldPermutation,
     *            the field permutation according to the input.
     * @param missingWriterFactory,
     *            the factory for customizing missing value serialization.
     * @return a primary index scan operator and its location constraints.
     * @throws AlgebricksException
     */
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
    int numKeys = dataset.getPrimaryKeys().size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
    ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
    try {
        Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
        // prepare callback
        JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
        int[] primaryKeyFields = new int[numKeys];
        for (int i = 0; i < numKeys; i++) {
            primaryKeyFields[i] = i;
        }
        boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
        IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
        IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
        ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
        IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
        LSMPrimaryUpsertOperatorDescriptor op;
        ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
        ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
        // add the previous record first
        int f = 0;
        outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
        f++;
        // add the previous meta second
        if (dataset.hasMetaPart()) {
            outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
            outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
            f++;
        }
        // add the previous filter third
        int fieldIdx = -1;
        if (numFilterFields > 0) {
            String filterField = DatasetUtil.getFilterField(dataset).get(0);
            String[] fieldNames = itemType.getFieldNames();
            int i = 0;
            for (; i < fieldNames.length; i++) {
                if (fieldNames[i].equals(filterField)) {
                    break;
                }
            }
            fieldIdx = i;
            outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
            outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
            f++;
        }
        for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
            outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
            outputSerDes[j + f] = inputRecordDesc.getFields()[j];
        }
        RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
        op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
        return new Pair<>(op, splitsAndConstraint.second);
    } catch (MetadataException me) {
        throw new AlgebricksException(me);
    }
}
Also used : LSMPrimaryUpsertOperatorDescriptor(org.apache.asterix.runtime.operators.LSMPrimaryUpsertOperatorDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) Index(org.apache.asterix.metadata.entities.Index) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.hyracks.algebricks.common.utils.Pair) IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) ARecordType(org.apache.asterix.om.types.ARecordType)

Example 27 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class DatasetUtil method createPrimaryIndexScanOp.

/**
     * Creates a primary index scan operator for a given dataset.
     *
     * @param spec,
     *            the job specification.
     * @param metadataProvider,
     *            the metadata provider.
     * @param dataset,
     *            the dataset to scan.
     * @param jobId,
     *            the AsterixDB job id for transaction management.
     * @return a primary index scan operator.
     * @throws AlgebricksException
     */
public static IOperatorDescriptor createPrimaryIndexScanOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, JobId jobId) throws AlgebricksException {
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IFileSplitProvider primaryFileSplitProvider = primarySplitsAndConstraint.first;
    AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
    // -Infinity
    int[] lowKeyFields = null;
    // +Infinity
    int[] highKeyFields = null;
    ITransactionSubsystemProvider txnSubsystemProvider = TransactionSubsystemProvider.INSTANCE;
    boolean temp = dataset.getDatasetDetails().isTemp();
    ISearchOperationCallbackFactory searchCallbackFactory = temp ? NoOpOperationCallbackFactory.INSTANCE : new PrimaryIndexInstantSearchOperationCallbackFactory(jobId, dataset.getDatasetId(), dataset.getPrimaryBloomFilterFields(), txnSubsystemProvider, IRecoveryManager.ResourceType.LSM_BTREE);
    IndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider);
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, dataset.getPrimaryRecordDescriptor(metadataProvider), lowKeyFields, highKeyFields, true, true, indexHelperFactory, false, false, null, searchCallbackFactory, null, null, false);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, primarySearchOp, primaryPartitionConstraint);
    return primarySearchOp;
}
Also used : PrimaryIndexInstantSearchOperationCallbackFactory(org.apache.asterix.transaction.management.opcallbacks.PrimaryIndexInstantSearchOperationCallbackFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ITransactionSubsystemProvider(org.apache.asterix.common.context.ITransactionSubsystemProvider) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory)

Example 28 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class DatasetUtil method compactDatasetJobSpec.

public static JobSpecification compactDatasetJobSpec(Dataverse dataverse, String datasetName, MetadataProvider metadataProvider) throws AlgebricksException {
    String dataverseName = dataverse.getDataverseName();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    if (dataset == null) {
        throw new AsterixException("Could not find dataset " + datasetName + " in dataverse " + dataverseName);
    }
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, indexHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    spec.addRoot(compactOp);
    return spec;
}
Also used : LSMTreeIndexCompactOperatorDescriptor(org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor) AsterixException(org.apache.asterix.common.exceptions.AsterixException) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 29 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildFilesIndexUpdateJobSpec.

public static JobSpecification buildFilesIndexUpdateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException {
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName()));
    IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider);
    ExternalFilesIndexModificationOperatorDescriptor externalFilesOp = new ExternalFilesIndexModificationOperatorDescriptor(spec, dataflowHelperFactory, externalFilesSnapshot);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second);
    spec.addRoot(externalFilesOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ExternalFilesIndexModificationOperatorDescriptor(org.apache.asterix.external.operators.ExternalFilesIndexModificationOperatorDescriptor)

Example 30 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildRecoverOp.

public static JobSpecification buildRecoverOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesRecoverOperatorDescriptor op = new ExternalDatasetIndexesRecoverOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesRecoverOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesRecoverOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)34 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)34 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)22 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)7 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)6