use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class Groupby method createJob.
private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, int htSize, long fileSize, int frameLimit, int frameSize, String alg, boolean outPlain) {
JobSpecification spec = new JobSpecification(frameSize);
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new DelimitedDataTupleParserFactory(lineitemParserFactories, '|'), lineitemDesc);
createPartitionConstraint(spec, fileScanner, inSplits);
// Output: each unique string with an integer count
RecordDescriptor outDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, // IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE });
// Specify the grouping key, which will be the string extracted during
// the scan.
int[] keys = new int[] { 0 };
AbstractOperatorDescriptor grouper;
if (alg.equalsIgnoreCase("hash")) {
// external hash graph
grouper = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new IntegerNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, false) }), outDesc, outDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { MurmurHash3BinaryHashFunctionFamily.INSTANCE }));
createPartitionConstraint(spec, grouper, outSplits);
} else if (alg.equalsIgnoreCase("sort")) {
grouper = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keys, new IntegerNormalizedKeyComputerFactory(), new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, true) }), outDesc, outDesc, false);
createPartitionConstraint(spec, grouper, outSplits);
} else {
System.err.println("unknow groupby alg:" + alg);
return null;
}
// Connect scanner with the grouper
IConnectorDescriptor scanGroupConnDef2 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { // PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY),
PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
spec.connect(scanGroupConnDef2, fileScanner, 0, grouper, 0);
IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
AbstractSingleActivityOperatorDescriptor writer = outPlain ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
createPartitionConstraint(spec, writer, outSplits);
IConnectorDescriptor groupOutConn = new OneToOneConnectorDescriptor(spec);
spec.connect(groupOutConn, grouper, 0, writer, 0);
spec.addRoot(writer);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTreeTestContext method create.
public static LSMRTreeTestContext create(IIOManager ioManager, List<IVirtualBufferCache> virtualBufferCaches, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ISerializerDeserializer[] fieldSerdes, IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeyFields, RTreePolicyType rtreePolicyType, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, IMetadataPageManagerFactory metadataPageManagerFactory) throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] rtreeCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeyFields);
int numBtreeFields = fieldSerdes.length - numKeyFields;
ISerializerDeserializer[] btreeFieldSerdes = new ISerializerDeserializer[numBtreeFields];
int[] btreeFields = new int[numBtreeFields];
for (int i = 0; i < numBtreeFields; i++) {
btreeFields[i] = numKeyFields + i;
btreeFieldSerdes[i] = fieldSerdes[numKeyFields + i];
}
IBinaryComparatorFactory[] btreeCmpFactories = SerdeUtils.serdesToComparatorFactories(btreeFieldSerdes, numBtreeFields);
LSMRTree lsmTree = LSMRTreeUtils.createLSMTree(ioManager, virtualBufferCaches, file, diskBufferCache, diskFileMapProvider, typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType, bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback, LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length), null, btreeFields, null, null, null, true, false, metadataPageManagerFactory);
LSMRTreeTestContext testCtx = new LSMRTreeTestContext(fieldSerdes, lsmTree);
return testCtx;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTreeWithAntiMatterTuplesTestContext method create.
public static LSMRTreeWithAntiMatterTuplesTestContext create(IIOManager ioManager, List<IVirtualBufferCache> virtualBufferCaches, FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ISerializerDeserializer[] fieldSerdes, IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeyFields, RTreePolicyType rtreePolicyType, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback, IMetadataPageManagerFactory metadataPageManagerFactory) throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] rtreeCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeyFields);
IBinaryComparatorFactory[] btreeCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, fieldSerdes.length);
LSMRTreeWithAntiMatterTuples lsmTree = LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(ioManager, virtualBufferCaches, file, diskBufferCache, diskFileMapProvider, typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType, mergePolicy, opTracker, ioScheduler, ioOpCallback, LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length), null, null, null, null, true, false, metadataPageManagerFactory);
LSMRTreeWithAntiMatterTuplesTestContext testCtx = new LSMRTreeWithAntiMatterTuplesTestContext(fieldSerdes, lsmTree);
return testCtx;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class RTreeSearchCursorTest method rangeSearchTest.
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void rangeSearchTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING RANGE SEARCH CURSOR FOR RTREE");
}
IBufferCache bufferCache = harness.getBufferCache();
// Declare fields.
int fieldCount = 5;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = IntegerPointable.TYPE_TRAITS;
typeTraits[4] = IntegerPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
// Declare keys.
int keyFieldCount = 4;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// create value providers
IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(cmpFactories.length, IntegerPointable.FACTORY);
RTreeTypeAwareTupleWriterFactory tupleWriterFactory = new RTreeTypeAwareTupleWriterFactory(typeTraits);
ITreeIndexMetadataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
ITreeIndexFrameFactory interiorFrameFactory = new RTreeNSMInteriorFrameFactory(tupleWriterFactory, valueProviderFactories, RTreePolicyType.RTREE, false);
ITreeIndexFrameFactory leafFrameFactory = new RTreeNSMLeafFrameFactory(tupleWriterFactory, valueProviderFactories, RTreePolicyType.RTREE, false);
IRTreeInteriorFrame interiorFrame = (IRTreeInteriorFrame) interiorFrameFactory.createFrame();
IRTreeLeafFrame leafFrame = (IRTreeLeafFrame) leafFrameFactory.createFrame();
IMetadataPageManager freePageManager = new LinkedMetaDataPageManager(bufferCache, metaFrameFactory);
RTree rtree = new RTree(bufferCache, harness.getFileMapProvider(), freePageManager, interiorFrameFactory, leafFrameFactory, cmpFactories, fieldCount, harness.getFileReference(), false);
rtree.create();
rtree.activate();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
ITreeIndexAccessor indexAccessor = rtree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
int numInserts = 10000;
ArrayList<RTreeCheckTuple> checkTuples = new ArrayList<>();
for (int i = 0; i < numInserts; i++) {
int p1x = rnd.nextInt();
int p1y = rnd.nextInt();
int p2x = rnd.nextInt();
int p2y = rnd.nextInt();
int pk = rnd.nextInt();
;
TupleUtils.createIntegerTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), pk);
try {
indexAccessor.insert(tuple);
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
throw e;
}
}
RTreeCheckTuple checkTuple = new RTreeCheckTuple(fieldCount, keyFieldCount);
checkTuple.appendField(Math.min(p1x, p2x));
checkTuple.appendField(Math.min(p1y, p2y));
checkTuple.appendField(Math.max(p1x, p2x));
checkTuple.appendField(Math.max(p1y, p2y));
checkTuple.appendField(pk);
checkTuples.add(checkTuple);
}
// Build key.
ArrayTupleBuilder keyTb = new ArrayTupleBuilder(keyFieldCount);
ArrayTupleReference key = new ArrayTupleReference();
TupleUtils.createIntegerTuple(keyTb, key, -1000, -1000, 1000, 1000);
MultiComparator cmp = MultiComparator.create(cmpFactories);
ITreeIndexCursor searchCursor = new RTreeSearchCursor(interiorFrame, leafFrame);
SearchPredicate searchPredicate = new SearchPredicate(key, cmp);
RTreeCheckTuple keyCheck = (RTreeCheckTuple) rTreeTestUtils.createCheckTupleFromTuple(key, fieldSerdes, keyFieldCount);
HashMultiSet<RTreeCheckTuple> expectedResult = rTreeTestUtils.getRangeSearchExpectedResults(checkTuples, keyCheck);
rTreeTestUtils.getRangeSearchExpectedResults(checkTuples, keyCheck);
indexAccessor.search(searchCursor, searchPredicate);
rTreeTestUtils.checkExpectedResults(searchCursor, expectedResult, fieldSerdes, keyFieldCount, null);
rtree.deactivate();
rtree.destroy();
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class BTreeStatsTest method test01.
@Test
public void test01() throws Exception {
TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES, MAX_OPEN_FILES);
IBufferCache bufferCache = harness.getBufferCache();
IFileMapProvider fmp = harness.getFileMapProvider();
// declare fields
int fieldCount = 2;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
// declare keys
int keyFieldCount = 1;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(tupleWriterFactory);
ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
ITreeIndexMetadataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) leafFrameFactory.createFrame();
IBTreeInteriorFrame interiorFrame = (IBTreeInteriorFrame) interiorFrameFactory.createFrame();
ITreeIndexMetadataFrame metaFrame = metaFrameFactory.createFrame();
IMetadataPageManager freePageManager = new LinkedMetaDataPageManager(bufferCache, metaFrameFactory);
BTree btree = new BTree(bufferCache, fmp, freePageManager, interiorFrameFactory, leafFrameFactory, cmpFactories, fieldCount, harness.getFileReference());
btree.create();
btree.activate();
Random rnd = new Random();
rnd.setSeed(50);
long start = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("INSERTING INTO TREE");
}
IFrame frame = new VSizeFrame(ctx);
FrameTupleAppender appender = new FrameTupleAppender();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
DataOutput dos = tb.getDataOutput();
ISerializerDeserializer[] recDescSers = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
RecordDescriptor recDesc = new RecordDescriptor(recDescSers);
IFrameTupleAccessor accessor = new FrameTupleAccessor(recDesc);
accessor.reset(frame.getBuffer());
FrameTupleReference tuple = new FrameTupleReference();
ITreeIndexAccessor indexAccessor = btree.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// 10000
for (int i = 0; i < 100000; i++) {
int f0 = rnd.nextInt() % 100000;
int f1 = 5;
tb.reset();
IntegerSerializerDeserializer.INSTANCE.serialize(f0, dos);
tb.addFieldEndOffset();
IntegerSerializerDeserializer.INSTANCE.serialize(f1, dos);
tb.addFieldEndOffset();
appender.reset(frame, true);
appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
tuple.reset(accessor, 0);
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 10000 == 0) {
long end = System.currentTimeMillis();
LOGGER.info("INSERTING " + i + " : " + f0 + " " + f1 + " " + (end - start));
}
}
try {
indexAccessor.insert(tuple);
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
e.printStackTrace();
throw e;
}
}
}
int fileId = fmp.lookupFileId(harness.getFileReference());
TreeIndexStatsGatherer statsGatherer = new TreeIndexStatsGatherer(bufferCache, freePageManager, fileId, btree.getRootPageId());
TreeIndexStats stats = statsGatherer.gatherStats(leafFrame, interiorFrame, metaFrame);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("\n" + stats.toString());
}
TreeIndexBufferCacheWarmup bufferCacheWarmup = new TreeIndexBufferCacheWarmup(bufferCache, freePageManager, fileId);
bufferCacheWarmup.warmup(leafFrame, metaFrame, new int[] { 1, 2 }, new int[] { 2, 5 });
btree.deactivate();
btree.destroy();
bufferCache.close();
}
Aggregations