use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class InvertedIndexResourceFactoryProvider method getResourceFactory.
@Override
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException {
// Get basic info
List<List<String>> primaryKeys = dataset.getPrimaryKeys();
List<List<String>> secondaryKeys = index.getKeyFieldNames();
List<String> filterFieldName = DatasetUtil.getFilterField(dataset);
int numPrimaryKeys = primaryKeys.size();
int numSecondaryKeys = secondaryKeys.size();
// Validate
if (dataset.getDatasetType() != DatasetType.INTERNAL) {
throw new CompilationException(ErrorCode.COMPILATION_INDEX_TYPE_NOT_SUPPORTED_FOR_DATASET_TYPE, index.getIndexType().name(), dataset.getDatasetType());
}
if (numPrimaryKeys > 1) {
throw new AsterixException("Cannot create inverted index on dataset with composite primary key.");
}
if (numSecondaryKeys > 1) {
throw new AsterixException("Cannot create composite inverted index on multiple fields.");
}
boolean isPartitioned = index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX || index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX;
int numTokenKeyPairFields = (!isPartitioned) ? 1 + numPrimaryKeys : 2 + numPrimaryKeys;
int[] invertedIndexFields = null;
int[] secondaryFilterFieldsForNonBulkLoadOps = null;
int[] invertedIndexFieldsForNonBulkLoadOps = null;
int[] secondaryFilterFields = null;
if (filterFieldName != null) {
invertedIndexFields = new int[numTokenKeyPairFields];
for (int i = 0; i < invertedIndexFields.length; i++) {
invertedIndexFields[i] = i;
}
secondaryFilterFieldsForNonBulkLoadOps = new int[filterFieldName.size()];
secondaryFilterFieldsForNonBulkLoadOps[0] = numSecondaryKeys + numPrimaryKeys;
invertedIndexFieldsForNonBulkLoadOps = new int[numSecondaryKeys + numPrimaryKeys];
for (int i = 0; i < invertedIndexFieldsForNonBulkLoadOps.length; i++) {
invertedIndexFieldsForNonBulkLoadOps[i] = i;
}
secondaryFilterFields = new int[filterFieldName.size()];
secondaryFilterFields[0] = numTokenKeyPairFields - numPrimaryKeys + numPrimaryKeys;
}
IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider();
IStorageManager storageManager = storageComponentProvider.getStorageManager();
ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index);
ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index);
IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory();
AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId());
ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider();
boolean durable = !dataset.isTemp();
double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate();
ITypeTraits[] typeTraits = getInvListTypeTraits(mdProvider, dataset, recordType, metaType);
IBinaryComparatorFactory[] cmpFactories = getInvListComparatorFactories(mdProvider, dataset, recordType, metaType);
ITypeTraits[] tokenTypeTraits = getTokenTypeTraits(dataset, index, recordType, metaType);
IBinaryComparatorFactory[] tokenCmpFactories = getTokenComparatorFactories(dataset, index, recordType, metaType);
IBinaryTokenizerFactory tokenizerFactory = getTokenizerFactory(dataset, index, recordType, metaType);
return new LSMInvertedIndexLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, secondaryFilterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, durable, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, isPartitioned, invertedIndexFields, secondaryFilterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, bloomFilterFalsePositiveRate);
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class AbstractLSMRTreeExamplesTest method additionalFilteringingExample.
/**
* Test the LSM component filters.
*/
@Test
public void additionalFilteringingExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Testing LSMRTree or LSMRTreeWithAntiMatterTuples component filters.");
}
// Declare fields.
int fieldCount = 6;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = IntegerPointable.TYPE_TRAITS;
typeTraits[4] = IntegerPointable.TYPE_TRAITS;
typeTraits[5] = IntegerPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// Declare BTree keys, this will only be used for LSMRTree
int btreeKeyFieldCount;
IBinaryComparatorFactory[] btreeCmpFactories;
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
//Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
btreeKeyFieldCount = 1;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeFields = new int[btreeKeyFieldCount];
for (int i = 0; i < btreeKeyFieldCount; i++) {
btreeFields[i] = rtreeKeyFieldCount + i;
}
} else {
btreeKeyFieldCount = 6;
btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
btreeCmpFactories[5] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
}
// create value providers
IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
int[] rtreeFields = { 0, 1, 2, 3, 4 };
ITypeTraits[] filterTypeTraits = { IntegerPointable.TYPE_TRAITS };
IBinaryComparatorFactory[] filterCmpFactories = { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) };
int[] filterFields = { 5 };
ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RTREE, rtreeFields, btreeFields, filterTypeTraits, filterCmpFactories, filterFields);
treeIndex.create();
treeIndex.activate();
long start = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Inserting into tree...");
}
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
int numInserts = 10000;
for (int i = 0; i < numInserts; i++) {
int p1x = rnd.nextInt();
int p1y = rnd.nextInt();
int p2x = rnd.nextInt();
int p2y = rnd.nextInt();
int pk = 5;
int filter = i;
TupleUtils.createIntegerTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), pk, filter);
try {
indexAccessor.insert(tuple);
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
throw e;
}
}
}
long end = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(numInserts + " inserts in " + (end - start) + "ms");
}
scan(indexAccessor, fieldSerdes);
diskOrderScan(indexAccessor, fieldSerdes);
// Build key.
ArrayTupleBuilder keyTb = new ArrayTupleBuilder(rtreeKeyFieldCount);
ArrayTupleReference key = new ArrayTupleReference();
TupleUtils.createIntegerTuple(keyTb, key, -1000, -1000, 1000, 1000);
// Build min filter key.
ArrayTupleBuilder minFilterTb = new ArrayTupleBuilder(filterFields.length);
ArrayTupleReference minTuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(minFilterTb, minTuple, 400);
// Build max filter key.
ArrayTupleBuilder maxFilterTb = new ArrayTupleBuilder(filterFields.length);
ArrayTupleReference maxTuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(maxFilterTb, maxTuple, 500);
rangeSearch(rtreeCmpFactories, indexAccessor, fieldSerdes, key, minTuple, maxTuple);
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class LSMInvertedIndexTestContext method create.
public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness, ISerializerDeserializer[] fieldSerdes, int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType, int[] invertedIndexFields, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields, int[] filterFieldsForNonBulkLoadOps, int[] invertedIndexFieldsForNonBulkLoadOps) throws HyracksDataException {
ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IOManager ioManager = harness.getIOManager();
IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, fieldSerdes.length);
// Set token type traits and comparators.
ITypeTraits[] tokenTypeTraits = new ITypeTraits[tokenFieldCount];
IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[tokenFieldCount];
for (int i = 0; i < tokenTypeTraits.length; i++) {
tokenTypeTraits[i] = allTypeTraits[i];
tokenCmpFactories[i] = allCmpFactories[i];
}
// Set inverted-list element type traits and comparators.
int invListFieldCount = fieldSerdes.length - tokenFieldCount;
ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFieldCount];
IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListFieldCount];
for (int i = 0; i < invListTypeTraits.length; i++) {
invListTypeTraits[i] = allTypeTraits[i + tokenFieldCount];
invListCmpFactories[i] = allCmpFactories[i + tokenFieldCount];
}
// Create index and test context.
IInvertedIndex invIndex;
assert harness.getVirtualBufferCaches().size() > 0;
switch(invIndexType) {
case INMEMORY:
{
invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getVirtualBufferCaches().get(0), new VirtualFreePageManager(harness.getVirtualBufferCaches().get(0)), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, ioManager.resolveAbsolutePath(harness.getOnDiskDir()));
break;
}
case PARTITIONED_INMEMORY:
{
invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness.getVirtualBufferCaches().get(0), new VirtualFreePageManager(harness.getVirtualBufferCaches().get(0)), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, ioManager.resolveAbsolutePath(harness.getOnDiskDir()));
break;
}
case ONDISK:
{
invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(ioManager, harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, harness.getInvListsFileRef(), harness.getMetadataPageManagerFactory());
break;
}
case PARTITIONED_ONDISK:
{
invIndex = InvertedIndexUtils.createPartitionedOnDiskInvertedIndex(ioManager, harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, harness.getInvListsFileRef(), harness.getMetadataPageManagerFactory());
break;
}
case LSM:
{
invIndex = InvertedIndexUtils.createLSMInvertedIndex(ioManager, harness.getVirtualBufferCaches(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(), harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback(), invertedIndexFields, filterTypeTraits, filterCmpFactories, filterFields, filterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, true, harness.getMetadataPageManagerFactory());
break;
}
case PARTITIONED_LSM:
{
invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(ioManager, harness.getVirtualBufferCaches(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(), harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback(), invertedIndexFields, filterTypeTraits, filterCmpFactories, filterFields, filterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, true, harness.getMetadataPageManagerFactory());
break;
}
default:
{
throw HyracksDataException.create(ErrorCode.UNKNOWN_INVERTED_INDEX_TYPE, invIndexType);
}
}
InvertedIndexTokenizingTupleIterator indexTupleIter = null;
switch(invIndexType) {
case INMEMORY:
case ONDISK:
case LSM:
{
indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
break;
}
case PARTITIONED_INMEMORY:
case PARTITIONED_ONDISK:
case PARTITIONED_LSM:
{
indexTupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
break;
}
default:
{
throw HyracksDataException.create(ErrorCode.UNKNOWN_INVERTED_INDEX_TYPE, invIndexType);
}
}
LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory, invIndexType, indexTupleIter);
return testCtx;
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class BTreeResourceFactoryProvider method getTypeTraits.
private static ITypeTraits[] getTypeTraits(MetadataProvider metadataProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType) throws AlgebricksException {
ITypeTraits[] primaryTypeTraits = dataset.getPrimaryTypeTraits(metadataProvider, recordType, metaType);
if (index.isPrimaryIndex()) {
return primaryTypeTraits;
} else if (dataset.getDatasetType() == DatasetType.EXTERNAL && index.getIndexName().equals(IndexingConstants.getFilesIndexName(dataset.getDatasetName()))) {
return FilesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS;
}
int numPrimaryKeys = dataset.getPrimaryKeys().size();
int numSecondaryKeys = index.getKeyFieldNames().size();
ITypeTraitProvider typeTraitProvider = metadataProvider.getStorageComponentProvider().getTypeTraitProvider();
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[numSecondaryKeys + numPrimaryKeys];
for (int i = 0; i < numSecondaryKeys; i++) {
ARecordType sourceType;
List<Integer> keySourceIndicators = index.getKeyFieldSourceIndicators();
if (keySourceIndicators == null || keySourceIndicators.get(i) == 0) {
sourceType = recordType;
} else {
sourceType = metaType;
}
Pair<IAType, Boolean> keyTypePair = Index.getNonNullableOpenFieldType(index.getKeyFieldTypes().get(i), index.getKeyFieldNames().get(i), sourceType);
IAType keyType = keyTypePair.first;
secondaryTypeTraits[i] = typeTraitProvider.getTypeTrait(keyType);
}
// Add serializers and comparators for primary index fields.
for (int i = 0; i < numPrimaryKeys; i++) {
secondaryTypeTraits[numSecondaryKeys + i] = primaryTypeTraits[i];
}
return secondaryTypeTraits;
}
use of org.apache.hyracks.api.dataflow.value.ITypeTraits in project asterixdb by apache.
the class Dataset method getResourceFactory.
/**
* Create the index dataflow helper factory for a particular index on the dataset
*
* @param mdProvider
* metadata provider to get metadata information, components, and runtimes
* @param index
* the index to get the dataflow helper factory for
* @param recordType
* the record type for the dataset
* @param metaType
* the meta type for the dataset
* @param mergePolicyFactory
* the merge policy factory of the dataset
* @param mergePolicyProperties
* the merge policy properties for the dataset
* @return indexDataflowHelperFactory
* an instance of {@link org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory}
* @throws AlgebricksException
* if dataflow helper factory could not be created
*/
public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties) throws AlgebricksException {
ITypeTraits[] filterTypeTraits = DatasetUtil.computeFilterTypeTraits(this, recordType);
IBinaryComparatorFactory[] filterCmpFactories = DatasetUtil.computeFilterBinaryComparatorFactories(this, recordType, mdProvider.getStorageComponentProvider().getComparatorFactoryProvider());
IResourceFactory resourceFactory;
switch(index.getIndexType()) {
case BTREE:
resourceFactory = bTreeResourceFactoryProvider.getResourceFactory(mdProvider, this, index, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterTypeTraits, filterCmpFactories);
break;
case RTREE:
resourceFactory = rTreeResourceFactoryProvider.getResourceFactory(mdProvider, this, index, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterTypeTraits, filterCmpFactories);
break;
case LENGTH_PARTITIONED_NGRAM_INVIX:
case LENGTH_PARTITIONED_WORD_INVIX:
case SINGLE_PARTITION_NGRAM_INVIX:
case SINGLE_PARTITION_WORD_INVIX:
resourceFactory = invertedIndexResourceFactoryProvider.getResourceFactory(mdProvider, this, index, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterTypeTraits, filterCmpFactories);
break;
default:
throw new CompilationException(ErrorCode.COMPILATION_UNKNOWN_INDEX_TYPE, index.getIndexType().toString());
}
return new DatasetLocalResourceFactory(datasetId, resourceFactory);
}
Aggregations