use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class HeapSortMergeTest method createSortMergeJobSpec.
public static JobSpecification createSortMergeJobSpec() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
int outputLimit = 20;
TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
LimitOperatorDescriptor filter = new LimitOperatorDescriptor(spec, ordersDesc, outputLimit);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, filter, NC1_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, filter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), filter, 0, printer, 0);
return spec;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class AbstractRTreeOperatorTest method setup.
@Before
public void setup() throws Exception {
testHelper = createTestHelper();
primaryFileName = testHelper.getPrimaryIndexName();
primarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, primaryFileName) });
primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
secondaryFileName = testHelper.getSecondaryIndexName();
secondarySplitProvider = new ConstantFileSplitProvider(new FileSplit[] { new ManagedFileSplit(NC1_ID, secondaryFileName) });
secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// field, type and key declarations for primary index
primaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[5] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[6] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[7] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[8] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[9] = UTF8StringPointable.TYPE_TRAITS;
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// field, type and key declarations for secondary indexes
secondaryTypeTraits[0] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[1] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[2] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[3] = DoublePointable.TYPE_TRAITS;
secondaryTypeTraits[4] = UTF8StringPointable.TYPE_TRAITS;
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
secondaryComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
// This only used for LSMRTree
int[] rtreeFields = null;
int[] filterFields = null;
ITypeTraits[] filterTypes = null;
IBinaryComparatorFactory[] filterCmpFactories = null;
if (rTreeType == RTreeType.LSMRTREE || rTreeType == RTreeType.LSMRTREE_WITH_ANTIMATTER) {
rtreeFields = new int[] { 0, 1, 2, 3, 4 };
filterFields = new int[] { 4 };
filterTypes = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
filterCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
}
int[] btreeFields = null;
if (rTreeType == RTreeType.LSMRTREE) {
btreeKeyFieldCount = 1;
btreeComparatorFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
btreeFields = new int[] { 4 };
} else {
btreeComparatorFactories[0] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[1] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[2] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[3] = PointableBinaryComparatorFactory.of(DoublePointable.FACTORY);
btreeComparatorFactories[4] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
}
IPrimitiveValueProviderFactory[] secondaryValueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(secondaryComparatorFactories.length, DoublePointable.FACTORY);
rtreeFactory = createSecondaryResourceFactory(secondaryValueProviderFactories, RTreePolicyType.RSTARTREE, btreeComparatorFactories, LSMRTreeUtils.proposeBestLinearizer(secondaryTypeTraits, secondaryComparatorFactories.length), btreeFields);
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class LSMRTree method flush.
@Override
public ILSMDiskComponent flush(ILSMIOOperation operation) throws HyracksDataException {
LSMRTreeFlushOperation flushOp = (LSMRTreeFlushOperation) operation;
LSMRTreeMemoryComponent flushingComponent = (LSMRTreeMemoryComponent) flushOp.getFlushingComponent();
// Renaming order is critical because we use assume ordering when we
// read the file names when we open the tree.
// The RTree should be renamed before the BTree.
// scan the memory RTree
ITreeIndexAccessor memRTreeAccessor = flushingComponent.getRTree().createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
RTreeSearchCursor rtreeScanCursor = (RTreeSearchCursor) memRTreeAccessor.createSearchCursor(false);
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
LSMRTreeDiskComponent component = createDiskComponent(componentFactory, flushOp.getTarget(), flushOp.getBTreeTarget(), flushOp.getBloomFilterTarget(), true);
//count the number of tuples in the buddy btree
ITreeIndexAccessor memBTreeAccessor = flushingComponent.getBTree().createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
RangePredicate btreeNullPredicate = new RangePredicate(null, null, true, true, null, null);
IIndexCursor btreeCountingCursor = ((BTreeAccessor) memBTreeAccessor).createCountingSearchCursor();
memBTreeAccessor.search(btreeCountingCursor, btreeNullPredicate);
long numBTreeTuples = 0L;
try {
while (btreeCountingCursor.hasNext()) {
btreeCountingCursor.next();
ITupleReference countTuple = btreeCountingCursor.getTuple();
numBTreeTuples = IntegerPointable.getInteger(countTuple.getFieldData(0), countTuple.getFieldStart(0));
}
} finally {
btreeCountingCursor.close();
}
ILSMDiskComponentBulkLoader componentBulkLoader = createComponentBulkLoader(component, 1.0f, false, numBTreeTuples, false, false);
ITreeIndexCursor cursor;
IBinaryComparatorFactory[] linearizerArray = { linearizer };
TreeTupleSorter rTreeTupleSorter = new TreeTupleSorter(flushingComponent.getRTree().getFileId(), linearizerArray, rtreeLeafFrameFactory.createFrame(), rtreeLeafFrameFactory.createFrame(), flushingComponent.getRTree().getBufferCache(), comparatorFields);
// BulkLoad the tuples from the in-memory tree into the new disk
// RTree.
boolean isEmpty = true;
try {
while (rtreeScanCursor.hasNext()) {
isEmpty = false;
rtreeScanCursor.next();
rTreeTupleSorter.insertTupleEntry(rtreeScanCursor.getPageId(), rtreeScanCursor.getTupleOffset());
}
} finally {
rtreeScanCursor.close();
}
rTreeTupleSorter.sort();
cursor = rTreeTupleSorter;
if (!isEmpty) {
try {
while (cursor.hasNext()) {
cursor.next();
ITupleReference frameTuple = cursor.getTuple();
componentBulkLoader.add(frameTuple);
}
} finally {
cursor.close();
}
}
// scan the memory BTree
IIndexCursor btreeScanCursor = memBTreeAccessor.createSearchCursor(false);
memBTreeAccessor.search(btreeScanCursor, btreeNullPredicate);
try {
while (btreeScanCursor.hasNext()) {
btreeScanCursor.next();
ITupleReference frameTuple = btreeScanCursor.getTuple();
componentBulkLoader.delete(frameTuple);
}
} finally {
btreeScanCursor.close();
}
if (component.getLSMComponentFilter() != null) {
List<ITupleReference> filterTuples = new ArrayList<>();
filterTuples.add(flushingComponent.getLSMComponentFilter().getMinTuple());
filterTuples.add(flushingComponent.getLSMComponentFilter().getMaxTuple());
getFilterManager().updateFilter(component.getLSMComponentFilter(), filterTuples);
getFilterManager().writeFilter(component.getLSMComponentFilter(), component.getRTree());
}
// Note. If we change the filter to write to metadata object, we don't need the if block above
flushingComponent.getMetadata().copy(component.getMetadata());
componentBulkLoader.end();
return component;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class OrderedIndexExamplesTest method bulkLoadExample.
/**
* Bulk load example. Load a tree with 100,000 tuples. BTree has a composite
* key to "simulate" non-unique index creation.
*/
@Test
public void bulkLoadExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Bulk load example");
}
// Declare fields.
int fieldCount = 3;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
// declare keys
int keyFieldCount = 2;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
bloomFilterKeyFields[1] = 1;
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
treeIndex.create();
treeIndex.activate();
// Load sorted records.
int ins = 100000;
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Bulk loading " + ins + " tuples");
}
long start = System.currentTimeMillis();
IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, ins, true);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
for (int i = 0; i < ins; i++) {
TupleUtils.createIntegerTuple(tb, tuple, i, i, 5);
bulkLoader.add(tuple);
}
bulkLoader.end();
long end = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(ins + " tuples loaded in " + (end - start) + "ms");
}
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// Build low key.
ArrayTupleBuilder lowKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference lowKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(lowKeyTb, lowKey, 44444);
// Build high key.
ArrayTupleBuilder highKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference highKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(highKeyTb, highKey, 44500);
// Prefix-Range search in [44444, 44500]
rangeSearch(cmpFactories, indexAccessor, fieldSerdes, lowKey, highKey, null, null);
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory in project asterixdb by apache.
the class OrderedIndexExamplesTest method deleteExample.
/**
* Deletion Example. Create a BTree with one variable-length key field and
* one variable-length value field. Fill B-tree with random values using
* insertions, then delete entries one-by-one. Repeat procedure a few times
* on same BTree.
*/
@Test
public void deleteExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deletion Example");
}
// Declare fields.
int fieldCount = 2;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
treeIndex.create();
treeIndex.activate();
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// Max string length to be generated.
int runs = 3;
for (int run = 0; run < runs; run++) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deletion example run: " + (run + 1) + "/" + runs);
LOGGER.info("Inserting into tree...");
}
int maxLength = 10;
int ins = 10000;
String[] f0s = new String[ins];
String[] f1s = new String[ins];
int insDone = 0;
int[] insDoneCmp = new int[ins];
for (int i = 0; i < ins; i++) {
String f0 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
String f1 = randomString(Math.abs(rnd.nextInt()) % maxLength + 1, rnd);
TupleUtils.createTuple(tb, tuple, fieldSerdes, f0, f1);
f0s[i] = f0;
f1s[i] = f1;
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Inserting " + i);
}
}
try {
indexAccessor.insert(tuple);
insDone++;
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.DUPLICATE_KEY) {
throw e;
}
}
insDoneCmp[i] = insDone;
}
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Deleting from tree...");
}
int delDone = 0;
for (int i = 0; i < ins; i++) {
TupleUtils.createTuple(tb, tuple, fieldSerdes, f0s[i], f1s[i]);
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Deleting " + i);
}
}
try {
indexAccessor.delete(tuple);
delDone++;
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.UPDATE_OR_DELETE_NON_EXISTENT_KEY) {
throw e;
}
}
if (insDoneCmp[i] != delDone) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("INCONSISTENT STATE, ERROR IN DELETION EXAMPLE.");
LOGGER.info("INSDONECMP: " + insDoneCmp[i] + " " + delDone);
}
break;
}
}
if (insDone != delDone) {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("ERROR! INSDONE: " + insDone + " DELDONE: " + delDone);
}
break;
}
}
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
Aggregations