use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.
the class ExternalIndexingOperations method buildAbortOp.
public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
AlgebricksPartitionConstraint constraints = null;
for (Index index : indexes) {
IFileSplitProvider indexSplitProvider;
if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
indexSplitProvider = sAndConstraints.first;
constraints = sAndConstraints.second;
} else {
indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
}
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
treeDataflowHelperFactories.add(indexDataflowHelperFactory);
}
ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories);
spec.addRoot(op);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.
the class SecondaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples that we are retrieving from the primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// use a disk-order scan to read primary index
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
// sort the tuples as preparation for bulk load into secondary index
// fields to sort on
int[] sortFields = { 1, 0 };
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] fieldPermutation = { 1, 0 };
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
// connect the ops
spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.
the class PrimaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
int fieldCount = 4;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// create roviders for B-Tree
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
// build tuple containing low and high search keys
// high
ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2);
// key
// and
// low
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low key
IntegerSerializerDeserializer.INSTANCE.serialize(100, dos);
tb.addFieldEndOffset();
// build
IntegerSerializerDeserializer.INSTANCE.serialize(200, dos);
// high key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples going
int[] lowKeyFields = { 0 };
// into search op
// low key is in field 1 of tuples going
int[] highKeyFields = { 1 };
// into search op
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, btreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeSearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.
the class RTreeResourceFactoryProvider method getResourceFactory.
@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
if (index.getKeyFieldNames().size() != 1) {
throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_INDEX_NUM_OF_FIELD, index.getKeyFieldNames().size(), index.getIndexType(), 1);
}
IAType spatialType = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(0), index.getKeyFieldNames().get(0), recordType).first;
if (spatialType == null) {
throw new CompilationException(ErrorCode.COMPILATION_FIELD_NOT_FOUND, StringUtils.join(index.getKeyFieldNames().get(0), '.'));
}
List<List<String>> primaryKeyFields = dataset.getPrimaryKeys();
int numPrimaryKeys = primaryKeyFields.size();
ITypeTraits[] primaryTypeTraits = null;
IBinaryComparatorFactory[] primaryComparatorFactories = null;
IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
primaryTypeTraits = new ITypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)];
primaryComparatorFactories = new IBinaryComparatorFactory[numPrimaryKeys];
List<Integer> indicators = null;
if (dataset.hasMetaPart()) {
indicators = ((InternalDatasetDetails) dataset.getDatasetDetails()).getKeySourceIndicator();
}
for (int i = 0; i < numPrimaryKeys; i++) {
IAType keyType = (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(primaryKeyFields.get(i)) : metaType.getSubFieldType(primaryKeyFields.get(i));
primaryComparatorFactories[i] = storageComponentProvider.getComparatorFactoryProvider().getBinaryComparatorFactory(keyType, true);
primaryTypeTraits[i] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(keyType);
}
primaryTypeTraits[numPrimaryKeys] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(recordType);
if (dataset.hasMetaPart()) {
primaryTypeTraits[numPrimaryKeys + 1] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(recordType);
}
}
boolean isPointMBR = spatialType.getTypeTag() == ATypeTag.POINT || spatialType.getTypeTag() == ATypeTag.POINT3D;
int numDimensions = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
int numNestedSecondaryKeyFields = numDimensions * 2;
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[numNestedSecondaryKeyFields];
IPrimitiveValueProviderFactory[] valueProviderFactories = new IPrimitiveValueProviderFactory[numNestedSecondaryKeyFields];
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[numNestedSecondaryKeyFields + numPrimaryKeys];
IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(spatialType.getTypeTag());
ATypeTag keyType = nestedKeyType.getTypeTag();
for (int i = 0; i < numNestedSecondaryKeyFields; i++) {
secondaryComparatorFactories[i] = storageComponentProvider.getComparatorFactoryProvider().getBinaryComparatorFactory(nestedKeyType, true);
secondaryTypeTraits[i] = storageComponentProvider.getTypeTraitProvider().getTypeTrait(nestedKeyType);
valueProviderFactories[i] = storageComponentProvider.getPrimitiveValueProviderFactory();
}
for (int i = 0; i < numPrimaryKeys; i++) {
secondaryTypeTraits[numNestedSecondaryKeyFields + i] = (dataset.getDatasetType() == DatasetType.INTERNAL) ? primaryTypeTraits[i] : IndexingConstants.getTypeTraits(i);
}
int[] rtreeFields = null;
if (filterTypeTraits != null && filterTypeTraits.length > 0) {
rtreeFields = new int[numNestedSecondaryKeyFields + numPrimaryKeys];
for (int i = 0; i < rtreeFields.length; i++) {
rtreeFields[i] = i;
}
}
IStorageManager storageManager = storageComponentProvider.getStorageManager();
ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
boolean durable = !dataset.isTemp();
ILinearizeComparatorFactory linearizeCmpFactory = MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length);
ITypeTraits[] typeTraits = getTypeTraits(mdProvider, dataset, index, recordType, metaType);
IBinaryComparatorFactory[] rtreeCmpFactories = getCmpFactories(mdProvider, index, recordType, metaType);
int[] secondaryFilterFields = (filterTypeTraits != null && filterTypeTraits.length > 0) ? new int[] { numNestedSecondaryKeyFields + numPrimaryKeys } : null;
IBinaryComparatorFactory[] btreeCompFactories = dataset.getDatasetType() == DatasetType.EXTERNAL ? IndexingConstants.getBuddyBtreeComparatorFactories() : getComparatorFactoriesForDeletedKeyBTree(secondaryTypeTraits, primaryComparatorFactories, secondaryComparatorFactories);
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
return new LSMRTreeWithAntiMatterLocalResourceFactory(storageManager, typeTraits, rtreeCmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, valueProviderFactories, rTreePolicyType, linearizeCmpFactory, rtreeFields, isPointMBR, btreeCompFactories);
} else {
return new ExternalRTreeLocalResourceFactory(storageManager, typeTraits, rtreeCmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, btreeCompFactories, valueProviderFactories, rTreePolicyType, linearizeCmpFactory, rtreeFields, new int[] { numNestedSecondaryKeyFields }, isPointMBR, mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate());
}
}
use of org.apache.hyracks.storage.common.IStorageManager in project asterixdb by apache.
the class InvertedIndexResourceFactoryProvider method getResourceFactory.
@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
// Get basic info
List<List<String>> primaryKeys = dataset.getPrimaryKeys();
List<List<String>> secondaryKeys = index.getKeyFieldNames();
List<String> filterFieldName = DatasetUtil.getFilterField(dataset);
int numPrimaryKeys = primaryKeys.size();
int numSecondaryKeys = secondaryKeys.size();
// Validate
if (dataset.getDatasetType() != DatasetType.INTERNAL) {
throw new CompilationException(ErrorCode.COMPILATION_INDEX_TYPE_NOT_SUPPORTED_FOR_DATASET_TYPE, index.getIndexType().name(), dataset.getDatasetType());
}
if (numPrimaryKeys > 1) {
throw new AsterixException("Cannot create inverted index on dataset with composite primary key.");
}
if (numSecondaryKeys > 1) {
throw new AsterixException("Cannot create composite inverted index on multiple fields.");
}
boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
int numTokenKeyPairFields = (!isPartitioned) ? 1 + numPrimaryKeys : 2 + numPrimaryKeys;
int[] invertedIndexFields = null;
int[] secondaryFilterFieldsForNonBulkLoadOps = null;
int[] invertedIndexFieldsForNonBulkLoadOps = null;
int[] secondaryFilterFields = null;
if (filterFieldName != null) {
invertedIndexFields = new int[numTokenKeyPairFields];
for (int i = 0; i < invertedIndexFields.length; i++) {
invertedIndexFields[i] = i;
}
secondaryFilterFieldsForNonBulkLoadOps = new int[filterFieldName.size()];
secondaryFilterFieldsForNonBulkLoadOps[0] = numSecondaryKeys + numPrimaryKeys;
invertedIndexFieldsForNonBulkLoadOps = new int[numSecondaryKeys + numPrimaryKeys];
for (int i = 0; i < invertedIndexFieldsForNonBulkLoadOps.length; i++) {
invertedIndexFieldsForNonBulkLoadOps[i] = i;
}
secondaryFilterFields = new int[filterFieldName.size()];
secondaryFilterFields[0] = numTokenKeyPairFields - numPrimaryKeys + numPrimaryKeys;
}
IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
IStorageManager storageManager = storageComponentProvider.getStorageManager();
ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
boolean durable = !dataset.isTemp();
double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate();
ITypeTraits[] typeTraits = getInvListTypeTraits(mdProvider, dataset, recordType, metaType);
IBinaryComparatorFactory[] cmpFactories = getInvListComparatorFactories(mdProvider, dataset, recordType, metaType);
ITypeTraits[] tokenTypeTraits = getTokenTypeTraits(dataset, index, recordType, metaType);
IBinaryComparatorFactory[] tokenCmpFactories = getTokenComparatorFactories(dataset, index, recordType, metaType);
IBinaryTokenizerFactory tokenizerFactory = getTokenizerFactory(dataset, index, recordType, metaType);
return new LSMInvertedIndexLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, isPartitioned, invertedIndexFields, secondaryFilterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
Aggregations