Examples with IndexDataflowHelperFactory - org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory

Example 31 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildCommitJob.

public static JobSpecification buildCommitJob(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesCommitOperatorDescriptor op = new ExternalDatasetIndexesCommitOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}

Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesCommitOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesCommitOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 32 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildFilesIndexCreateJobSpec.

public static JobSpecification buildFilesIndexCreateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException {
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
    ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first;
    Map<String, String> mergePolicyProperties = compactionInfo.second;
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName()));
    IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
    String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
    Index fileIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
    ARecordType recordType = (ARecordType) metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, fileIndex, recordType, null, mergePolicyFactory, mergePolicyProperties);
    IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, !dataset.isTemp());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider);
    ExternalFilesIndexCreateOperatorDescriptor externalFilesOp = new ExternalFilesIndexCreateOperatorDescriptor(spec, indexBuilderFactory, dataflowHelperFactory, externalFilesSnapshot);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second);
    spec.addRoot(externalFilesOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}

Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) Index(org.apache.asterix.metadata.entities.Index) ExternalFilesIndexCreateOperatorDescriptor(org.apache.asterix.external.operators.ExternalFilesIndexCreateOperatorDescriptor) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Map(java.util.Map) ARecordType(org.apache.asterix.om.types.ARecordType) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ILSMMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory)

Example 33 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildDropFilesIndexJobSpec.

public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException {
    String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second);
    spec.addRoot(btreeDrop);
    return spec;
}

Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IndexDropOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexDropOperatorDescriptor)

Example 34 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class MetadataProvider method buildRtreeRuntime.

public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildRtreeRuntime(JobSpecification jobSpec, List<LogicalVariable> outputVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, boolean retainInput, boolean retainMissing, Dataset dataset, String indexName, int[] keyFields, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes) throws AlgebricksException {
    try {
        int numPrimaryKeys = dataset.getPrimaryKeys().size();
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        if (secondaryIndex == null) {
            throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + dataset.getDatasetName());
        }
        RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
        int[] primaryKeyFields = new int[numPrimaryKeys];
        for (int i = 0; i < numPrimaryKeys; i++) {
            primaryKeyFields[i] = i;
        }
        ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, IndexOperation.SEARCH, primaryKeyFields);
        RTreeSearchOperatorDescriptor rtreeSearchOp;
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), spPc.first);
        if (dataset.getDatasetType() == DatasetType.INTERNAL) {
            rtreeSearchOp = new RTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, keyFields, true, true, indexDataflowHelperFactory, retainInput, retainMissing, context.getMissingWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, maxFilterFieldIndexes, false);
        } else {
            // Create the operator
            rtreeSearchOp = new ExternalRTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, keyFields, true, true, indexDataflowHelperFactory, retainInput, retainMissing, context.getMissingWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, maxFilterFieldIndexes, ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this));
        }
        return new Pair<>(rtreeSearchOp, spPc.second);
    } catch (MetadataException me) {
        throw new AlgebricksException(me);
    }
}

Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) ExternalRTreeSearchOperatorDescriptor(org.apache.asterix.external.operators.ExternalRTreeSearchOperatorDescriptor) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) DatasetCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) ISearchOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) RTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor) ExternalRTreeSearchOperatorDescriptor(org.apache.asterix.external.operators.ExternalRTreeSearchOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)34 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)34 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)22 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)7 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)6