Search in sources :

Example 26 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class DatasetUtil method compactDatasetJobSpec.

public static JobSpecification compactDatasetJobSpec(Dataverse dataverse, String datasetName, MetadataProvider metadataProvider) throws AlgebricksException {
    String dataverseName = dataverse.getDataverseName();
    Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
    if (dataset == null) {
        throw new AsterixException("Could not find dataset " + datasetName + " in dataverse " + dataverseName);
    }
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, indexHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second);
    spec.addRoot(compactOp);
    return spec;
}
Also used : LSMTreeIndexCompactOperatorDescriptor(org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor) AsterixException(org.apache.asterix.common.exceptions.AsterixException) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AMutableString(org.apache.asterix.om.base.AMutableString) AString(org.apache.asterix.om.base.AString) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 27 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildFilesIndexUpdateJobSpec.

public static JobSpecification buildFilesIndexUpdateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException {
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName()));
    IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider);
    ExternalFilesIndexModificationOperatorDescriptor externalFilesOp = new ExternalFilesIndexModificationOperatorDescriptor(spec, dataflowHelperFactory, externalFilesSnapshot);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second);
    spec.addRoot(externalFilesOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ExternalFilesIndexModificationOperatorDescriptor(org.apache.asterix.external.operators.ExternalFilesIndexModificationOperatorDescriptor)

Example 28 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildRecoverOp.

public static JobSpecification buildRecoverOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesRecoverOperatorDescriptor op = new ExternalDatasetIndexesRecoverOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesRecoverOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesRecoverOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 29 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildCommitJob.

public static JobSpecification buildCommitJob(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesCommitOperatorDescriptor op = new ExternalDatasetIndexesCommitOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesCommitOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesCommitOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 30 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildFilesIndexCreateJobSpec.

public static JobSpecification buildFilesIndexCreateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException {
    IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext());
    ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first;
    Map<String, String> mergePolicyProperties = compactionInfo.second;
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName()));
    IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
    String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
    Index fileIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
    ARecordType recordType = (ARecordType) metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
    IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, fileIndex, recordType, null, mergePolicyFactory, mergePolicyProperties);
    IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, !dataset.isTemp());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider);
    ExternalFilesIndexCreateOperatorDescriptor externalFilesOp = new ExternalFilesIndexCreateOperatorDescriptor(spec, indexBuilderFactory, dataflowHelperFactory, externalFilesSnapshot);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second);
    spec.addRoot(externalFilesOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageComponentProvider(org.apache.asterix.common.context.IStorageComponentProvider) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IndexBuilderFactory(org.apache.hyracks.storage.am.common.build.IndexBuilderFactory) Index(org.apache.asterix.metadata.entities.Index) ExternalFilesIndexCreateOperatorDescriptor(org.apache.asterix.external.operators.ExternalFilesIndexCreateOperatorDescriptor) IIndexBuilderFactory(org.apache.hyracks.storage.am.common.api.IIndexBuilderFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) Map(java.util.Map) ARecordType(org.apache.asterix.om.types.ARecordType) IResourceFactory(org.apache.hyracks.storage.common.IResourceFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) ILSMMergePolicyFactory(org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)32 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)26 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)21 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)8 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6