Search in sources :

Example 11 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class PrimaryIndexBulkLoadExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    // schema of tuples to be generated: 5 fields with string, string, int,
    // int, string
    // we will use field-index 2 as primary key to fill a clustered index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
    new UTF8StringSerializerDeserializer(), // we will use this as payload
    new UTF8StringSerializerDeserializer(), // we will use this field as key
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    new UTF8StringSerializerDeserializer() });
    // generate numRecords records with field 2 being unique, integer values
    // in [0, 100000], and strings with max length of 10 characters, and
    // random seed 50
    DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 50);
    // run data generator on first nodecontroller given
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
    // sort the tuples as preparation for bulk load
    // fields to sort on
    int[] sortFields = { 2 };
    // comparators for sort fields
    IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
    comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
    JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
    // tuples to be put into B-Tree shall have 4 fields
    int fieldCount = 4;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // create providers for B-Tree
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    // map field 2 of input tuple
    int[] fieldPermutation = { 2, 1, 3, 4 };
    // to field 0 of B-Tree tuple,
    // etc.
    IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
    TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, recDesc, fieldPermutation, 0.7f, false, 1000L, true, dataflowHelperFactory);
    JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
    // distribute the records from the datagen via hashing to the bulk load
    // ops
    IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
    hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
    IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
    spec.connect(hashConn, dataGen, 0, sorter, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    return spec;
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) DataGenOperatorDescriptor(org.apache.hyracks.examples.btree.helper.DataGenOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)

Example 12 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class TestNodeController method getInsertPipeline.

public Pair<LSMInsertDeleteOperatorNodePushable, CommitRuntime> getInsertPipeline(IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws AlgebricksException, HyracksDataException {
    PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators, storageComponentProvider);
    IndexOperation op = IndexOperation.INSERT;
    IModificationOperationCallbackFactory modOpCallbackFactory = new PrimaryIndexModificationOperationCallbackFactory(getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, TXN_SUBSYSTEM_PROVIDER, Operation.get(op), ResourceType.LSM_BTREE);
    IRecordDescriptorProvider recordDescProvider = primaryIndexInfo.getInsertRecordDescriptorProvider();
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.fileSplitProvider);
    LSMInsertDeleteOperatorNodePushable insertOp = new LSMInsertDeleteOperatorNodePushable(ctx, PARTITION, primaryIndexInfo.primaryIndexInsertFieldsPermutations, recordDescProvider.getInputRecordDescriptor(new ActivityId(new OperatorDescriptorId(0), 0), 0), op, true, indexHelperFactory, modOpCallbackFactory, null);
    CommitRuntime commitOp = new CommitRuntime(ctx, getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, false, true, PARTITION, true);
    insertOp.setOutputFrameWriter(0, commitOp, primaryIndexInfo.rDesc);
    commitOp.setInputRecordDescriptor(0, primaryIndexInfo.rDesc);
    return Pair.of(insertOp, commitOp);
}
Also used : IndexOperation(org.apache.hyracks.storage.am.common.ophelpers.IndexOperation) PrimaryIndexModificationOperationCallbackFactory(org.apache.asterix.transaction.management.opcallbacks.PrimaryIndexModificationOperationCallbackFactory) LSMInsertDeleteOperatorNodePushable(org.apache.asterix.common.dataflow.LSMInsertDeleteOperatorNodePushable) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) CommitRuntime(org.apache.asterix.transaction.management.runtime.CommitRuntime) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 13 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class TestNodeController method getFullScanPipeline.

public IPushRuntime getFullScanPipeline(IFrameWriter countOp, IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, NoMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws HyracksDataException, AlgebricksException {
    IPushRuntime emptyTupleOp = new EmptyTupleSourceRuntimeFactory().createPushRuntime(ctx);
    JobSpecification spec = new JobSpecification();
    PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators, storageComponentProvider);
    IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.fileSplitProvider);
    BTreeSearchOperatorDescriptor searchOpDesc = new BTreeSearchOperatorDescriptor(spec, primaryIndexInfo.rDesc, null, null, true, true, indexDataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, filterFields, filterFields, false);
    BTreeSearchOperatorNodePushable searchOp = searchOpDesc.createPushRuntime(ctx, primaryIndexInfo.getSearchRecordDescriptorProvider(), PARTITION, 1);
    emptyTupleOp.setFrameWriter(0, searchOp, primaryIndexInfo.getSearchRecordDescriptorProvider().getInputRecordDescriptor(null, 0));
    searchOp.setOutputFrameWriter(0, countOp, primaryIndexInfo.rDesc);
    return emptyTupleOp;
}
Also used : IPushRuntime(org.apache.hyracks.algebricks.runtime.base.IPushRuntime) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) EmptyTupleSourceRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) BTreeSearchOperatorNodePushable(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorNodePushable) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 14 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class DatasetUtil method dropDatasetJobSpec.

public static JobSpecification dropDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException, HyracksDataException, RemoteException, ACIDException {
    LOGGER.info("DROP DATASET: " + dataset);
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        return RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    }
    JobSpecification specPrimary = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, indexHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second);
    specPrimary.addRoot(primaryBtreeDrop);
    return specPrimary;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IndexDropOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.IndexDropOperatorDescriptor)

Example 15 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class ExternalIndexingOperations method buildAbortOp.

public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
    ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
    AlgebricksPartitionConstraint constraints = null;
    for (Index index : indexes) {
        IFileSplitProvider indexSplitProvider;
        if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
            indexSplitProvider = sAndConstraints.first;
            constraints = sAndConstraints.second;
        } else {
            indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
        }
        IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
        treeDataflowHelperFactories.add(indexDataflowHelperFactory);
    }
    ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories);
    spec.addRoot(op);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ExternalDatasetIndexesAbortOperatorDescriptor(org.apache.asterix.external.operators.ExternalDatasetIndexesAbortOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ArrayList(java.util.ArrayList) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Index(org.apache.asterix.metadata.entities.Index) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)32 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)26 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)21 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)8 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6