use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class SecondaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples that we are retrieving from the primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// use a disk-order scan to read primary index
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
// sort the tuples as preparation for bulk load into secondary index
// fields to sort on
int[] sortFields = { 1, 0 };
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] fieldPermutation = { 1, 0 };
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
// connect the ops
spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class PrimaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
int fieldCount = 4;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// create roviders for B-Tree
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
// build tuple containing low and high search keys
// high
ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2);
// key
// and
// low
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low key
IntegerSerializerDeserializer.INSTANCE.serialize(100, dos);
tb.addFieldEndOffset();
// build
IntegerSerializerDeserializer.INSTANCE.serialize(200, dos);
// high key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples going
int[] lowKeyFields = { 0 };
// into search op
// low key is in field 1 of tuples going
int[] highKeyFields = { 1 };
// into search op
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, btreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeSearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTreeUtils method createLSMTreeWithAntiMatterTuples.
public static LSMRTreeWithAntiMatterTuples createLSMTreeWithAntiMatterTuples(IIOManager ioManager, List<IVirtualBufferCache> virtualBufferCaches, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeComparatorFactories, IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, ILinearizeComparatorFactory linearizerCmpFactory, int[] rtreeFields, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields, boolean durable, boolean isPointMBR, IMetadataPageManagerFactory freePageManagerFactory) throws HyracksDataException {
ITreeIndexTupleWriterFactory rtreeInteriorFrameTupleWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, false);
ITreeIndexTupleWriterFactory rtreeLeafFrameTupleWriterFactory;
ITreeIndexTupleWriterFactory rtreeLeafFrameCopyTupleWriterFactory;
ITreeIndexTupleWriterFactory rtreeLeafFrameBulkLoadWriterFactory;
if (isPointMBR) {
int keyFieldCount = rtreeCmpFactories.length;
int valueFieldCount = btreeComparatorFactories.length - keyFieldCount;
rtreeLeafFrameTupleWriterFactory = new LSMRTreeTupleWriterFactoryForPointMBR(typeTraits, keyFieldCount, valueFieldCount, true, false);
rtreeLeafFrameCopyTupleWriterFactory = new LSMRTreeTupleWriterFactoryForPointMBR(typeTraits, keyFieldCount, valueFieldCount, true, false);
rtreeLeafFrameBulkLoadWriterFactory = new LSMRTreeTupleWriterFactoryForPointMBR(typeTraits, keyFieldCount, valueFieldCount, true, false);
} else {
rtreeLeafFrameTupleWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, false);
rtreeLeafFrameCopyTupleWriterFactory = new LSMRTreeCopyTupleWriterFactory(typeTraits);
rtreeLeafFrameBulkLoadWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, false);
}
LSMRTreeTupleWriterFactory btreeTupleWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, true);
ITreeIndexFrameFactory rtreeInteriorFrameFactory = new RTreeNSMInteriorFrameFactory(rtreeInteriorFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory rtreeLeafFrameFactory = new RTreeNSMLeafFrameFactory(rtreeLeafFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory rtreeLeafFrameBulkLoadFactory = new RTreeNSMLeafFrameFactory(rtreeLeafFrameBulkLoadWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory btreeInteriorFrameFactory = new BTreeNSMInteriorFrameFactory(btreeTupleWriterFactory);
ITreeIndexFrameFactory btreeLeafFrameFactory = new BTreeNSMLeafFrameFactory(btreeTupleWriterFactory);
ITreeIndexFrameFactory copyTupleLeafFrameFactory = new RTreeNSMLeafFrameFactory(rtreeLeafFrameCopyTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
TreeIndexFactory<RTree> diskRTreeFactory = new RTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, rtreeInteriorFrameFactory, copyTupleLeafFrameFactory, rtreeCmpFactories, typeTraits.length, isPointMBR);
TreeIndexFactory<RTree> bulkLoadRTreeFactory = new RTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, rtreeInteriorFrameFactory, rtreeLeafFrameBulkLoadFactory, rtreeCmpFactories, typeTraits.length, isPointMBR);
// The first field is for the sorted curve (e.g. Hilbert curve), and the
// second field is for the primary key.
int[] comparatorFields = new int[btreeComparatorFactories.length - rtreeCmpFactories.length + 1];
IBinaryComparatorFactory[] linearizerArray = new IBinaryComparatorFactory[btreeComparatorFactories.length - rtreeCmpFactories.length + 1];
comparatorFields[0] = 0;
for (int i = 1; i < comparatorFields.length; i++) {
comparatorFields[i] = rtreeCmpFactories.length - 1 + i;
}
linearizerArray[0] = linearizerCmpFactory;
int j = 1;
for (int i = rtreeCmpFactories.length; i < btreeComparatorFactories.length; i++) {
linearizerArray[j] = btreeComparatorFactories[i];
j++;
}
ComponentFilterHelper filterHelper = null;
LSMComponentFilterFrameFactory filterFrameFactory = null;
LSMComponentFilterManager filterManager = null;
if (filterCmpFactories != null) {
TypeAwareTupleWriterFactory filterTupleWriterFactory = new TypeAwareTupleWriterFactory(filterTypeTraits);
filterHelper = new ComponentFilterHelper(filterTupleWriterFactory, filterCmpFactories);
filterFrameFactory = new LSMComponentFilterFrameFactory(filterTupleWriterFactory);
filterManager = new LSMComponentFilterManager(filterFrameFactory);
}
ILSMIndexFileManager fileNameManager = new LSMRTreeWithAntiMatterTuplesFileManager(ioManager, diskFileMapProvider, file, diskRTreeFactory);
return new LSMRTreeWithAntiMatterTuples(ioManager, virtualBufferCaches, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory, bulkLoadRTreeFactory, filterHelper, filterFrameFactory, filterManager, diskFileMapProvider, typeTraits.length, rtreeCmpFactories, btreeComparatorFactories, linearizerCmpFactory, comparatorFields, linearizerArray, mergePolicy, opTracker, ioScheduler, ioOpCallback, rtreeFields, filterFields, durable, isPointMBR);
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTreeUtils method createExternalRTree.
public static ExternalRTree createExternalRTree(IIOManager ioManager, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, ILinearizeComparatorFactory linearizeCmpFactory, int[] buddyBTreeFields, boolean durable, boolean isPointMBR, IMetadataPageManagerFactory freePageManagerFactory) throws HyracksDataException {
int keyFieldCount = rtreeCmpFactories.length;
int valueFieldCount = typeTraits.length - keyFieldCount;
ITypeTraits[] btreeTypeTraits = new ITypeTraits[valueFieldCount];
for (int i = 0; i < buddyBTreeFields.length; i++) {
btreeTypeTraits[i] = typeTraits[buddyBTreeFields[i]];
}
ITreeIndexTupleWriterFactory rtreeInteriorFrameTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(typeTraits, false);
ITreeIndexTupleWriterFactory rtreeLeafFrameTupleWriterFactory = null;
if (isPointMBR) {
rtreeLeafFrameTupleWriterFactory = new LSMRTreeTupleWriterFactoryForPointMBR(typeTraits, keyFieldCount, valueFieldCount, false, false);
} else {
rtreeLeafFrameTupleWriterFactory = rtreeInteriorFrameTupleWriterFactory;
}
ITreeIndexTupleWriterFactory btreeTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(btreeTypeTraits, true);
ITreeIndexFrameFactory rtreeInteriorFrameFactory = new RTreeNSMInteriorFrameFactory(rtreeInteriorFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory rtreeLeafFrameFactory = new RTreeNSMLeafFrameFactory(rtreeLeafFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory btreeInteriorFrameFactory = new BTreeNSMInteriorFrameFactory(btreeTupleWriterFactory);
ITreeIndexFrameFactory btreeLeafFrameFactory = new BTreeNSMLeafFrameFactory(btreeTupleWriterFactory);
TreeIndexFactory<RTree> diskRTreeFactory = new RTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, rtreeCmpFactories, typeTraits.length, isPointMBR);
TreeIndexFactory<BTree> diskBTreeFactory = new BTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, btreeCmpFactories, btreeTypeTraits.length);
int[] comparatorFields = { 0 };
IBinaryComparatorFactory[] linearizerArray = { linearizeCmpFactory };
int[] bloomFilterKeyFields = new int[btreeCmpFactories.length];
for (int i = 0; i < btreeCmpFactories.length; i++) {
bloomFilterKeyFields[i] = i;
}
BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider, bloomFilterKeyFields);
ILSMIndexFileManager fileNameManager = new LSMRTreeFileManager(ioManager, diskFileMapProvider, file, diskRTreeFactory, diskBTreeFactory);
ExternalRTree lsmTree = new ExternalRTree(ioManager, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory, diskBTreeFactory, bloomFilterFactory, bloomFilterFalsePositiveRate, diskFileMapProvider, typeTraits.length, rtreeCmpFactories, btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray, mergePolicy, opTracker, ioScheduler, ioOpCallback, buddyBTreeFields, durable, isPointMBR);
return lsmTree;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTreeUtils method createLSMTree.
public static LSMRTree createLSMTree(IIOManager ioManager, List<IVirtualBufferCache> virtualBufferCaches, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, ILinearizeComparatorFactory linearizeCmpFactory, int[] rtreeFields, int[] buddyBTreeFields, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields, boolean durable, boolean isPointMBR, IMetadataPageManagerFactory freePageManagerFactory) throws HyracksDataException {
int valueFieldCount = buddyBTreeFields.length;
int keyFieldCount = typeTraits.length - valueFieldCount;
ITypeTraits[] btreeTypeTraits = new ITypeTraits[valueFieldCount];
for (int i = 0; i < valueFieldCount; i++) {
btreeTypeTraits[i] = typeTraits[buddyBTreeFields[i]];
}
ITreeIndexTupleWriterFactory rtreeInteriorFrameTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(typeTraits, false);
ITreeIndexTupleWriterFactory rtreeLeafFrameTupleWriterFactory = null;
if (isPointMBR) {
rtreeLeafFrameTupleWriterFactory = new LSMRTreeTupleWriterFactoryForPointMBR(typeTraits, keyFieldCount, valueFieldCount, false, false);
} else {
rtreeLeafFrameTupleWriterFactory = rtreeInteriorFrameTupleWriterFactory;
}
ITreeIndexTupleWriterFactory btreeTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(btreeTypeTraits, true);
ITreeIndexFrameFactory rtreeInteriorFrameFactory = new RTreeNSMInteriorFrameFactory(rtreeInteriorFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory rtreeLeafFrameFactory = new RTreeNSMLeafFrameFactory(rtreeLeafFrameTupleWriterFactory, valueProviderFactories, rtreePolicyType, isPointMBR);
ITreeIndexFrameFactory btreeInteriorFrameFactory = new BTreeNSMInteriorFrameFactory(btreeTupleWriterFactory);
ITreeIndexFrameFactory btreeLeafFrameFactory = new BTreeNSMLeafFrameFactory(btreeTupleWriterFactory);
TreeIndexFactory<RTree> diskRTreeFactory = new RTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, rtreeCmpFactories, typeTraits.length, isPointMBR);
TreeIndexFactory<BTree> diskBTreeFactory = new BTreeFactory(ioManager, diskBufferCache, diskFileMapProvider, freePageManagerFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, btreeCmpFactories, btreeTypeTraits.length);
int[] comparatorFields = { 0 };
IBinaryComparatorFactory[] linearizerArray = { linearizeCmpFactory };
int[] bloomFilterKeyFields = new int[btreeCmpFactories.length];
for (int i = 0; i < btreeCmpFactories.length; i++) {
bloomFilterKeyFields[i] = i;
}
BloomFilterFactory bloomFilterFactory = new BloomFilterFactory(diskBufferCache, diskFileMapProvider, bloomFilterKeyFields);
ComponentFilterHelper filterHelper = null;
LSMComponentFilterFrameFactory filterFrameFactory = null;
LSMComponentFilterManager filterManager = null;
if (filterCmpFactories != null) {
TypeAwareTupleWriterFactory filterTupleWriterFactory = new TypeAwareTupleWriterFactory(filterTypeTraits);
filterHelper = new ComponentFilterHelper(filterTupleWriterFactory, filterCmpFactories);
filterFrameFactory = new LSMComponentFilterFrameFactory(filterTupleWriterFactory);
filterManager = new LSMComponentFilterManager(filterFrameFactory);
}
ILSMIndexFileManager fileNameManager = new LSMRTreeFileManager(ioManager, diskFileMapProvider, file, diskRTreeFactory, diskBTreeFactory);
return new LSMRTree(ioManager, virtualBufferCaches, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory, diskBTreeFactory, bloomFilterFactory, filterHelper, filterFrameFactory, filterManager, bloomFilterFalsePositiveRate, diskFileMapProvider, typeTraits.length, rtreeCmpFactories, btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray, mergePolicy, opTracker, ioScheduler, ioOpCallback, rtreeFields, buddyBTreeFields, filterFields, durable, isPointMBR);
}
Aggregations