Search in sources :

Example 1 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class SecondaryBTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(index.getKeyFieldNames().size());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        /*
             * In case of external data,
             * this method is used to build loading jobs for both initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        // Assign op.
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        IOperatorDescriptor root;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    } else {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    }
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 2 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class SecondaryRTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
    /***************************************************
         * [ About PointMBR Optimization ]
         * Instead of storing a MBR(4 doubles) for a point(2 doubles) in RTree leaf node,
         * PointMBR concept is introduced.
         * PointMBR is a way to store a point as 2 doubles in RTree leaf node.
         * This reduces RTree index size roughly in half.
         * In order to fully benefit from the PointMBR concept, besides RTree,
         * external sort operator during bulk-loading (from either data loading or index creation)
         * must deal with point as 2 doubles instead of 4 doubles. Otherwise, external sort will suffer from twice as
         * many doubles as it actually requires. For this purpose,
         * PointMBR specific optimization logic is added as follows:
         * 1) CreateMBR function in assign operator generates 2 doubles, instead of 4 doubles.
         * 2) External sort operator sorts points represented with 2 doubles.
         * 3) Bulk-loading in RTree takes 4 doubles by reading 2 doubles twice and then,
         * do the same work as non-point MBR cases.
         ***************************************************/
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(numNestedSecondaryKeyFields);
    int numNestedSecondaryKeFieldsConsideringPointMBR = isPointMBR ? numNestedSecondaryKeyFields / 2 : numNestedSecondaryKeyFields;
    RecordDescriptor secondaryRecDescConsideringPointMBR = isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    } else {
        // External dataset
        /*
             * In case of external data, this method is used to build loading jobs for both
             * initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        // Assign op.
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        IOperatorDescriptor root;
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    }
    return spec;
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 3 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class SecondaryTreeIndexOperationsHelper method buildCompactJobSpec.

@Override
public JobSpecification buildCompactJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
    LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, dataflowHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, secondaryPartitionConstraint);
    spec.addRoot(compactOp);
    spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    return spec;
}
Also used : LSMTreeIndexCompactOperatorDescriptor(org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Example 4 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class AbstractRTreeOperatorTest method setup.

@Before
public void setup() throws Exception {
    testHelper = createTestHelper();
    primaryFileName = testHelper.getPrimaryIndexName();
    primarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, primaryFileName) });
    primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    secondaryFileName = testHelper.getSecondaryIndexName();
    secondarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, secondaryFileName) });
    secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
    // field, type and key declarations for primary index
    primaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[5] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[6] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[7] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[8] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[9] = UTF8StringPointable.TYPE_TRAITS;
    primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    // field, type and key declarations for secondary indexes
    secondaryTypeTraits[0] = DoublePointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = DoublePointable.TYPE_TRAITS;
    secondaryTypeTraits[2] = DoublePointable.TYPE_TRAITS;
    secondaryTypeTraits[3] = DoublePointable.TYPE_TRAITS;
    secondaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
    secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
    secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
    secondaryComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
    secondaryComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
    // This only used for LSMRTree
    int[] rtreeFields = null;
    int[] filterFields = null;
    ITypeTraits[] filterTypes = null;
    IBinaryComparatorFactory[] filterCmpFactories = null;
    if (rTreeType == RTreeType.LSMRTREE || rTreeType == RTreeType.LSMRTREE_WITH_ANTIMATTER) {
        rtreeFields = new int[] { 0, 1, 2, 3, 4 };
        filterFields = new int[] { 4 };
        filterTypes = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
        filterCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
    }
    int[] btreeFields = null;
    if (rTreeType == RTreeType.LSMRTREE) {
        btreeKeyFieldCount = 1;
        btreeComparatorFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
        btreeFields = new int[] { 4 };
    } else {
        btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
        btreeComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
        btreeComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
        btreeComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
        btreeComparatorFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    }
    IPrimitiveValueProviderFactory[] secondaryValueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(secondaryComparatorFactories.length, DoublePointable.FACTORY);
    rtreeFactory = createSecondaryResourceFactory(secondaryValueProviderFactories, RTreePolicyType.RSTARTREE, btreeComparatorFactories, LSMRTreeUtils.proposeBestLinearizer(secondaryTypeTraits, secondaryComparatorFactories.length), btreeFields);
}
Also used : ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IPrimitiveValueProviderFactory(org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) Before(org.junit.Before)

Example 5 with IndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.

the class TestNodeController method getInsertPipeline.

public Pair<LSMInsertDeleteOperatorNodePushable, CommitRuntime> getInsertPipeline(IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws AlgebricksException, HyracksDataException {
    PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators, storageComponentProvider);
    IndexOperation op = IndexOperation.INSERT;
    IModificationOperationCallbackFactory modOpCallbackFactory = new PrimaryIndexModificationOperationCallbackFactory(getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, TXN_SUBSYSTEM_PROVIDER, Operation.get(op), ResourceType.LSM_BTREE);
    IRecordDescriptorProvider recordDescProvider = primaryIndexInfo.getInsertRecordDescriptorProvider();
    IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.fileSplitProvider);
    LSMInsertDeleteOperatorNodePushable insertOp = new LSMInsertDeleteOperatorNodePushable(ctx, PARTITION, primaryIndexInfo.primaryIndexInsertFieldsPermutations, recordDescProvider.getInputRecordDescriptor(new ActivityId(new OperatorDescriptorId(0), 0), 0), op, true, indexHelperFactory, modOpCallbackFactory, null);
    CommitRuntime commitOp = new CommitRuntime(ctx, getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, false, true, PARTITION, true);
    insertOp.setOutputFrameWriter(0, commitOp, primaryIndexInfo.rDesc);
    commitOp.setInputRecordDescriptor(0, primaryIndexInfo.rDesc);
    return Pair.of(insertOp, commitOp);
}
Also used : IndexOperation(org.apache.hyracks.storage.am.common.ophelpers.IndexOperation) PrimaryIndexModificationOperationCallbackFactory(org.apache.asterix.transaction.management.opcallbacks.PrimaryIndexModificationOperationCallbackFactory) LSMInsertDeleteOperatorNodePushable(org.apache.asterix.common.dataflow.LSMInsertDeleteOperatorNodePushable) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) CommitRuntime(org.apache.asterix.transaction.management.runtime.CommitRuntime) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)34 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)34 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)22 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)7 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)6