Search in sources :

Example 1 with TreeIndexBulkLoadOperatorDescriptor

use of org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor in project asterixdb by apache.

the class SecondaryBTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(index.getKeyFieldNames().size());
    IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
        /*
             * In case of external data,
             * this method is used to build loading jobs for both initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        // Assign op.
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        IOperatorDescriptor root;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    } else {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
        // Create secondary BTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
        return spec;
    }
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 2 with TreeIndexBulkLoadOperatorDescriptor

use of org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor in project asterixdb by apache.

the class SecondaryRTreeOperationsHelper method buildLoadingJobSpec.

@Override
public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
    /***************************************************
         * [ About PointMBR Optimization ]
         * Instead of storing a MBR(4 doubles) for a point(2 doubles) in RTree leaf node,
         * PointMBR concept is introduced.
         * PointMBR is a way to store a point as 2 doubles in RTree leaf node.
         * This reduces RTree index size roughly in half.
         * In order to fully benefit from the PointMBR concept, besides RTree,
         * external sort operator during bulk-loading (from either data loading or index creation)
         * must deal with point as 2 doubles instead of 4 doubles. Otherwise, external sort will suffer from twice as
         * many doubles as it actually requires. For this purpose,
         * PointMBR specific optimization logic is added as follows:
         * 1) CreateMBR function in assign operator generates 2 doubles, instead of 4 doubles.
         * 2) External sort operator sorts points represented with 2 doubles.
         * 3) Bulk-loading in RTree takes 4 doubles by reading 2 doubles twice and then,
         * do the same work as non-point MBR cases.
         ***************************************************/
    JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
    int[] fieldPermutation = createFieldPermutationForBulkLoadOp(numNestedSecondaryKeyFields);
    int numNestedSecondaryKeFieldsConsideringPointMBR = isPointMBR ? numNestedSecondaryKeyFields / 2 : numNestedSecondaryKeyFields;
    RecordDescriptor secondaryRecDescConsideringPointMBR = isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc;
    boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
    IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
    if (dataset.getDatasetType() == DatasetType.INTERNAL) {
        // Create dummy key provider for feeding the primary index scan.
        IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
        JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
        // Create primary index scan op.
        IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
        // Assign op.
        IOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
        // Connect the operators.
        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        spec.addRoot(metaOp);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    } else {
        // External dataset
        /*
             * In case of external data, this method is used to build loading jobs for both
             * initial load on index creation
             * and transaction load on dataset referesh
             */
        // Create external indexing scan operator
        ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
        AbstractOperatorDescriptor sourceOp = primaryScanOp;
        if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
            sourceOp = createCastOp(spec, dataset.getDatasetType());
            spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
        }
        // Assign op.
        AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        // If any of the secondary fields are nullable, then add a select op that filters nulls.
        AlgebricksMetaOperatorDescriptor selectOp = null;
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
        }
        // Sort by secondary keys.
        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
        // Create secondary RTree bulk load op.
        IOperatorDescriptor root;
        AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
        if (externalFiles != null) {
            // Transaction load
            secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        } else {
            // Initial load
            secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
        }
        AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
        spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
        root = metaOp;
        spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
        if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
        } else {
            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
        }
        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
        spec.addRoot(root);
        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
    }
    return spec;
}
Also used : ExternalScanOperatorDescriptor(org.apache.asterix.external.operators.ExternalScanOperatorDescriptor) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ConnectorPolicyAssignmentPolicy(org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) AlgebricksMetaOperatorDescriptor(org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) SinkRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.base.SinkRuntimeFactory)

Example 3 with TreeIndexBulkLoadOperatorDescriptor

use of org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor in project asterixdb by apache.

the class SecondaryIndexOperationsHelper method createTreeIndexBulkLoadOp.

protected TreeIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec, int[] fieldPermutation, IIndexDataflowHelperFactory dataflowHelperFactory, float fillFactor) throws AlgebricksException {
    TreeIndexBulkLoadOperatorDescriptor treeIndexBulkLoadOp = new TreeIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, fillFactor, false, numElementsHint, false, dataflowHelperFactory);
    AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, treeIndexBulkLoadOp, secondaryPartitionConstraint);
    return treeIndexBulkLoadOp;
}
Also used : TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Example 4 with TreeIndexBulkLoadOperatorDescriptor

use of org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor in project asterixdb by apache.

the class AbstractRTreeOperatorTest method loadSecondaryIndex.

protected void loadSecondaryIndex() throws Exception {
    JobSpecification spec = new JobSpecification();
    // build dummy tuple containing nothing
    ArrayTupleBuilder tb = new ArrayTupleBuilder(primaryKeyFieldCount * 2);
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, keyProviderOp, NC1_ID);
    // - infinity
    int[] lowKeyFields = null;
    // + infinity
    int[] highKeyFields = null;
    // scan primary index
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, lowKeyFields, highKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primarySearchOp, NC1_ID);
    // load secondary index
    int[] fieldPermutation = { 6, 7, 8, 9, 0 };
    TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBulkLoad, NC1_ID);
    NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nsOpDesc, NC1_ID);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, secondaryBulkLoad, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoad, 0, nsOpDesc, 0);
    spec.addRoot(nsOpDesc);
    runTest(spec);
}
Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) DataOutput(java.io.DataOutput) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)

Example 5 with TreeIndexBulkLoadOperatorDescriptor

use of org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor in project asterixdb by apache.

the class MetadataProvider method getBTreeRuntime.

private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getBTreeRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    boolean temp = dataset.getDatasetDetails().isTemp();
    isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
    int numKeys = primaryKeys.size() + secondaryKeys.size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    // generate field permutations
    int[] fieldPermutation = new int[numKeys + numFilterFields];
    int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
    int i = 0;
    int j = 0;
    for (LogicalVariable varKey : secondaryKeys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        i++;
    }
    for (LogicalVariable varKey : primaryKeys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        modificationCallbackPrimaryKeyFields[j] = i;
        i++;
        j++;
    }
    if (numFilterFields > 0) {
        int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
        fieldPermutation[numKeys] = idx;
    }
    int[] prevFieldPermutation = null;
    if (indexOp == IndexOperation.UPSERT) {
        // generate field permutations for prev record
        prevFieldPermutation = new int[numKeys + numFilterFields];
        int k = 0;
        for (LogicalVariable varKey : prevSecondaryKeys) {
            int idx = propagatedSchema.findVariable(varKey);
            prevFieldPermutation[k] = idx;
            k++;
        }
        for (LogicalVariable varKey : primaryKeys) {
            int idx = propagatedSchema.findVariable(varKey);
            prevFieldPermutation[k] = idx;
            k++;
        }
        // Filter can only be one field!
        if (numFilterFields > 0) {
            int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
            prevFieldPermutation[numKeys] = idx;
        }
    }
    try {
        // Index parameters.
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
        // prepare callback
        JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
        IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
        IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
        IOperatorDescriptor op;
        if (bulkload) {
            long numElementsHint = getCardinalityPerPartitionHint(dataset);
            op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, idfh);
        } else if (indexOp == IndexOperation.UPSERT) {
            op = new LSMSecondaryUpsertOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, idfh, filterFactory, modificationCallbackFactory, prevFieldPermutation);
        } else {
            op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, filterFactory, false, modificationCallbackFactory);
        }
        return new Pair<>(op, splitsAndConstraint.second);
    } catch (Exception e) {
        throw new AlgebricksException(e);
    }
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) DatasetCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) LSMSecondaryUpsertOperatorDescriptor(org.apache.asterix.runtime.operators.LSMSecondaryUpsertOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) LSMTreeInsertDeleteOperatorDescriptor(org.apache.asterix.common.dataflow.LSMTreeInsertDeleteOperatorDescriptor) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)14 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)9 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)9 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)9 JobSpecification (org.apache.hyracks.api.job.JobSpecification)8 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)8 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)7 ExternalSortOperatorDescriptor (org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor)7 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6 NullSinkOperatorDescriptor (org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor)6 JobId (org.apache.asterix.common.transactions.JobId)5 MetadataException (org.apache.asterix.metadata.MetadataException)5 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)5 Dataset (org.apache.asterix.metadata.entities.Dataset)5 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)5 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)5 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)5 Pair (org.apache.hyracks.algebricks.common.utils.Pair)5 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)5 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)5