use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryBTreeOperationsHelper method buildLoadingJobSpec.
@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
int[] fieldPermutation = createFieldPermutationForBulkLoadOp(index.getKeyFieldNames().size());
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
/*
* In case of external data,
* this method is used to build loading jobs for both initial load on index creation
* and transaction load on dataset referesh
*/
// Create external indexing scan operator
ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
// Assign op.
AbstractOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
// Create secondary BTree bulk load op.
AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
IOperatorDescriptor root;
if (externalFiles != null) {
// Transaction load
secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
} else {
// Initial load
secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
}
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
root = metaOp;
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.addRoot(root);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
} else {
// Create dummy key provider for feeding the primary index scan.
IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
// Create primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
// Assign op.
IOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
// Create secondary BTree bulk load op.
TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
// Connect the operators.
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
spec.addRoot(metaOp);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryRTreeOperationsHelper method buildLoadingJobSpec.
@Override
public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
/***************************************************
* [ About PointMBR Optimization ]
* Instead of storing a MBR(4 doubles) for a point(2 doubles) in RTree leaf node,
* PointMBR concept is introduced.
* PointMBR is a way to store a point as 2 doubles in RTree leaf node.
* This reduces RTree index size roughly in half.
* In order to fully benefit from the PointMBR concept, besides RTree,
* external sort operator during bulk-loading (from either data loading or index creation)
* must deal with point as 2 doubles instead of 4 doubles. Otherwise, external sort will suffer from twice as
* many doubles as it actually requires. For this purpose,
* PointMBR specific optimization logic is added as follows:
* 1) CreateMBR function in assign operator generates 2 doubles, instead of 4 doubles.
* 2) External sort operator sorts points represented with 2 doubles.
* 3) Bulk-loading in RTree takes 4 doubles by reading 2 doubles twice and then,
* do the same work as non-point MBR cases.
***************************************************/
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
int[] fieldPermutation = createFieldPermutationForBulkLoadOp(numNestedSecondaryKeyFields);
int numNestedSecondaryKeFieldsConsideringPointMBR = isPointMBR ? numNestedSecondaryKeyFields / 2 : numNestedSecondaryKeyFields;
RecordDescriptor secondaryRecDescConsideringPointMBR = isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc;
boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
// Create dummy key provider for feeding the primary index scan.
IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
// Create primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
// Assign op.
IOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
// Create secondary RTree bulk load op.
TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
// Connect the operators.
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
spec.addRoot(metaOp);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
} else {
// External dataset
/*
* In case of external data, this method is used to build loading jobs for both
* initial load on index creation
* and transaction load on dataset referesh
*/
// Create external indexing scan operator
ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
AbstractOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
// Assign op.
AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
// Create secondary RTree bulk load op.
IOperatorDescriptor root;
AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
if (externalFiles != null) {
// Transaction load
secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
} else {
// Initial load
secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
}
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
root = metaOp;
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.addRoot(root);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
}
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryTreeIndexOperationsHelper method buildCompactJobSpec.
@Override
public JobSpecification buildCompactJobSpec() throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName());
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, dataflowHelperFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, secondaryPartitionConstraint);
spec.addRoot(compactOp);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class AbstractRTreeOperatorTest method setup.
@Before
public void setup() throws Exception {
testHelper = createTestHelper();
primaryFileName = testHelper.getPrimaryIndexName();
primarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, primaryFileName) });
primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
secondaryFileName = testHelper.getSecondaryIndexName();
secondarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, secondaryFileName) });
secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// field, type and key declarations for primary index
primaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[5] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[6] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[7] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[8] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[9] = UTF8StringPointable.TYPE_TRAITS;
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// field, type and key declarations for secondary indexes
secondaryTypeTraits[0] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[1] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[2] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[3] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
// This only used for LSMRTree
int[] rtreeFields = null;
int[] filterFields = null;
ITypeTraits[] filterTypes = null;
IBinaryComparatorFactory[] filterCmpFactories = null;
if (rTreeType == RTreeType.LSMRTREE || rTreeType == RTreeType.LSMRTREE_WITH_ANTIMATTER) {
rtreeFields = new int[] { 0, 1, 2, 3, 4 };
filterFields = new int[] { 4 };
filterTypes = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
filterCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
}
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
btreeKeyFieldCount = 1;
btreeComparatorFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
btreeFields = new int[] { 4 };
} else {
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
}
IPrimitiveValueProviderFactory[] secondaryValueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(secondaryComparatorFactories.length, DoublePointable.FACTORY);
rtreeFactory = createSecondaryResourceFactory(secondaryValueProviderFactories, RTreePolicyType.RSTARTREE, btreeComparatorFactories, LSMRTreeUtils.proposeBestLinearizer(secondaryTypeTraits, secondaryComparatorFactories.length), btreeFields);
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class TestNodeController method getInsertPipeline.
public Pair<LSMInsertDeleteOperatorNodePushable, CommitRuntime> getInsertPipeline(IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws AlgebricksException, HyracksDataException {
PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators, storageComponentProvider);
IndexOperation op = IndexOperation.INSERT;
IModificationOperationCallbackFactory modOpCallbackFactory = new PrimaryIndexModificationOperationCallbackFactory(getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, TXN_SUBSYSTEM_PROVIDER, Operation.get(op), ResourceType.LSM_BTREE);
IRecordDescriptorProvider recordDescProvider = primaryIndexInfo.getInsertRecordDescriptorProvider();
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.fileSplitProvider);
LSMInsertDeleteOperatorNodePushable insertOp = new LSMInsertDeleteOperatorNodePushable(ctx, PARTITION, primaryIndexInfo.primaryIndexInsertFieldsPermutations, recordDescProvider.getInputRecordDescriptor(new ActivityId(new OperatorDescriptorId(0), 0), 0), op, true, indexHelperFactory, modOpCallbackFactory, null);
CommitRuntime commitOp = new CommitRuntime(ctx, getTxnJobId(), dataset.getDatasetId(), primaryIndexInfo.primaryKeyIndexes, false, true, PARTITION, true);
insertOp.setOutputFrameWriter(0, commitOp, primaryIndexInfo.rDesc);
commitOp.setInputRecordDescriptor(0, primaryIndexInfo.rDesc);
return Pair.of(insertOp, commitOp);
}
Aggregations