use of com.yahoo.document.BucketId in project vespa by vespa-engine.
the class ComparisonNode method compare.
/**
* Compares a search column node with a literal node.
*
* @param factory The bucket id factory used.
* @param node The search column node.
* @param literal The literal node to compare to.
* @return The bucket set containing the buckets covered.
*/
private BucketSet compare(BucketIdFactory factory, SearchColumnNode node, LiteralNode literal) {
Object value = literal.getValue();
int bucketCount = (int) Math.pow(2, 16);
if (value instanceof Long) {
BucketSet ret = new BucketSet();
for (int i = 0; i < bucketCount; i++) {
BucketId id = new BucketId(16, i);
if ((Long) value == node.getDistribution().getColumn(id)) {
ret.add(new BucketId(16, i));
}
}
return ret;
}
return null;
}
use of com.yahoo.document.BucketId in project vespa by vespa-engine.
the class BucketIdFactoryTestCase method testBucketIdSerializationAndCompare.
public void testBucketIdSerializationAndCompare() {
BucketId bid = new BucketId(18, 0x123456789L);
assertEquals(bid, new BucketId(bid.toString()));
assertEquals(0, bid.compareTo(new BucketId(18, 0x123456789L)));
}
use of com.yahoo.document.BucketId in project vespa by vespa-engine.
the class VisitorIteratorTestCase method testRangeDistributionIncreaseMultipleBits.
public void testRangeDistributionIncreaseMultipleBits() throws ParseException {
int db = 16;
BucketIdFactory idFactory = new BucketIdFactory();
ProgressToken p = new ProgressToken();
VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, db, p);
// For this test, have 3 finished bucket, 2 pending and 1 active
for (int i = 0; i < 3; ++i) {
iter.update(iter.getNext().getSuperbucket(), ProgressToken.FINISHED_BUCKET);
}
VisitorIterator.BucketProgress[] bpp = new VisitorIterator.BucketProgress[2];
bpp[0] = iter.getNext();
bpp[1] = iter.getNext();
// Leave this hanging as active
VisitorIterator.BucketProgress bpa = iter.getNext();
iter.update(bpp[0].getSuperbucket(), new BucketId());
iter.update(bpp[1].getSuperbucket(), new BucketId());
iter.setDistributionBitCount(20);
// ProgressToken doesn't change yet, since it had active buckets
assertEquals(p.getDistributionBitCount(), 16);
assertEquals(iter.getDistributionBitCount(), 20);
assertEquals(iter.getBucketSource().getDistributionBitCount(), 20);
assertFalse(iter.hasNext());
assertFalse(iter.isDone());
assertTrue(iter.getBucketSource().shouldYield());
assertEquals(p.getPendingBucketCount(), 2);
assertEquals(p.getActiveBucketCount(), 1);
// Finish active, triggering the consistency fixes
iter.update(bpa.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
assertEquals(p.getDistributionBitCount(), 20);
assertEquals(p.getPendingBucketCount(), 32);
assertEquals(p.getActiveBucketCount(), 0);
// Each bucket with db:16 becomes equal to 16 buckets with db:20, so
// the bucket space position must be 16 * 6 = 96
assertEquals(p.getBucketCursor(), 96);
// Each finished bucket also covers less ground, so count is upped
// accordingly
assertEquals(p.getFinishedBucketCount(), 16 * 4);
// when using 20 distribution bits
for (int i = 0; i < 32; ++i) {
long testKey = ProgressToken.makeNthBucketKey(i + 48, 20);
VisitorIterator.BucketProgress bp = iter.getNext();
assertEquals(bp.getSuperbucket(), new BucketId(ProgressToken.keyToBucketId(testKey)));
iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
}
assertEquals(p.getPendingBucketCount(), 0);
assertEquals(p.getFinishedBucketCount(), 16 * 6);
// Bucket source should now begin returning from bucket 20:0x6000
assertEquals(iter.getNext().getSuperbucket(), new BucketId(20, 0x6000));
}
use of com.yahoo.document.BucketId in project vespa by vespa-engine.
the class VisitorIteratorTestCase method testProgressSerializationExplicit.
public void testProgressSerializationExplicit() throws ParseException {
int distBits = 16;
BucketIdFactory idFactory = new BucketIdFactory();
ProgressToken progress = new ProgressToken();
VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.user == 1234 or id.user == 6789 or id.user == 8009", idFactory, distBits, progress);
assertEquals(progress.getDistributionBitCount(), distBits);
assertTrue(iter.getBucketSource() instanceof VisitorIterator.ExplicitBucketSource);
assertEquals(progress.getFinishedBucketCount(), 0);
assertEquals(progress.getTotalBucketCount(), 3);
assertEquals(progress.getPendingBucketCount(), 3);
VisitorIterator.BucketProgress bp1 = iter.getNext();
VisitorIterator.BucketProgress bp2 = iter.getNext();
assertEquals(progress.getPendingBucketCount(), 1);
assertEquals(progress.getActiveBucketCount(), 2);
// Buckets are ordered by their reverse bucket id key
assertEquals(bp1.getSuperbucket(), new BucketId(32, 1234));
assertEquals(bp1.getProgress(), new BucketId());
// Put bucket 1234 back into pending
iter.update(bp1.getSuperbucket(), new BucketId(36, 1234));
assertEquals(progress.getPendingBucketCount(), 2);
assertEquals(bp2.getSuperbucket(), new BucketId(32, 8009));
assertEquals(bp2.getProgress(), new BucketId());
{
StringBuilder desired = new StringBuilder();
desired.append("VDS bucket progress file (").append(progress.percentFinished()).append("% completed)\n");
desired.append(distBits);
desired.append('\n');
desired.append(0);
desired.append('\n');
desired.append(0);
desired.append('\n');
desired.append(3);
desired.append('\n');
// Pending/active buckets are written in an increasing (key, not
// bucket-id!) order
desired.append(Long.toHexString(new BucketId(32, 1234).getRawId()));
desired.append(':');
desired.append(Long.toHexString(new BucketId(36, 1234).getRawId()));
desired.append('\n');
desired.append(Long.toHexString(new BucketId(32, 8009).getRawId()));
desired.append(":0\n");
desired.append(Long.toHexString(new BucketId(32, 6789).getRawId()));
desired.append(":0\n");
assertEquals(desired.toString(), progress.toString());
ProgressToken prog2 = new ProgressToken(progress.toString());
assertEquals(prog2.getDistributionBitCount(), distBits);
assertEquals(prog2.getTotalBucketCount(), 3);
assertEquals(prog2.getFinishedBucketCount(), 0);
VisitorIterator iter2 = VisitorIterator.createFromDocumentSelection("id.user == 1234 or id.user == 6789 or id.user == 8009", idFactory, distBits, prog2);
assertEquals(prog2.getPendingBucketCount(), 3);
assertFalse(prog2.hasActive());
assertTrue(iter2.hasNext());
assertFalse(iter2.isDone());
assertTrue(iter2.getBucketSource() instanceof VisitorIterator.ExplicitBucketSource);
assertFalse(iter2.getBucketSource().hasNext());
VisitorIterator.BucketProgress bp = iter2.getNext();
assertEquals(bp.getSuperbucket(), new BucketId(32, 1234));
assertEquals(bp.getProgress(), new BucketId(36, 1234));
assertEquals(prog2.getPendingBucketCount(), 2);
assertTrue(iter2.hasNext());
assertFalse(iter2.isDone());
bp = iter2.getNext();
assertEquals(bp.getSuperbucket(), new BucketId(32, 8009));
assertEquals(bp.getProgress(), new BucketId());
assertEquals(prog2.getPendingBucketCount(), 1);
assertTrue(iter2.hasNext());
assertFalse(iter2.isDone());
bp = iter2.getNext();
assertEquals(prog2.getPendingBucketCount(), 0);
assertEquals(bp.getSuperbucket(), new BucketId(32, 6789));
assertEquals(bp.getProgress(), new BucketId());
assertFalse(iter2.hasNext());
// Active buckets
assertFalse(iter2.isDone());
assertEquals(prog2.getActiveBucketCount(), 3);
}
// Finish off all active buckets
assertTrue(iter.hasNext());
assertFalse(iter.isDone());
bp1 = iter.getNext();
assertEquals(bp1.getSuperbucket(), new BucketId(32, 1234));
assertEquals(bp1.getProgress(), new BucketId(36, 1234));
iter.update(bp1.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
assertTrue(iter.hasNext());
assertFalse(iter.isDone());
bp1 = iter.getNext();
assertEquals(bp1.getSuperbucket(), new BucketId(32, 6789));
assertEquals(bp1.getProgress(), new BucketId());
// Just to make sure Java serializes the long properly
assertEquals(progress.toString(), "VDS bucket progress file (" + progress.percentFinished() + "% completed)\n" + "16\n" + "0\n" + "1\n" + "3\n" + "8000000000001f49:0\n" + "8000000000001a85:0\n");
iter.update(bp1.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
// At this point, we've got one active but no pending, so hasNext == false,
// but isDone is also == false
assertFalse(iter.hasNext());
assertFalse(iter.isDone());
assertEquals(progress.getPendingBucketCount(), 0);
assertEquals(progress.getActiveBucketCount(), 1);
iter.update(bp2.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
assertFalse(iter.hasNext());
assertTrue(iter.isDone());
assertTrue(progress.isFinished());
assertEquals(progress.getActiveBucketCount(), 0);
{
StringBuilder finished = new StringBuilder();
finished.append("VDS bucket progress file (100.0% completed)\n");
finished.append(distBits);
finished.append('\n');
// Cursor (not used by explicit)
finished.append(0);
finished.append('\n');
// Finished
finished.append(3);
finished.append('\n');
// Total
finished.append(3);
finished.append('\n');
assertEquals(finished.toString(), progress.toString());
}
}
use of com.yahoo.document.BucketId in project vespa by vespa-engine.
the class VisitorIteratorTestCase method testProgressSerializationRange.
public void testProgressSerializationRange() throws ParseException {
int distBits = 4;
BucketIdFactory idFactory = new BucketIdFactory();
ProgressToken progress = new ProgressToken();
// docsel will be unknown --> entire bucket range will be covered
VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, distBits, progress);
assertEquals(progress.getDistributionBitCount(), distBits);
assertTrue(iter.getBucketSource() instanceof VisitorIterator.DistributionRangeBucketSource);
assertEquals(progress.getFinishedBucketCount(), 0);
assertEquals(progress.getTotalBucketCount(), 1 << distBits);
// First, get+update half of the buckets, marking them as done
long bucketCount = 0;
long bucketStop = 1 << (distBits - 1);
while (iter.hasNext() && bucketCount != bucketStop) {
VisitorIterator.BucketProgress ids = iter.getNext();
iter.update(ids.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
++bucketCount;
}
assertEquals(bucketCount, bucketStop);
// Should be no buckets in limbo at this point
assertFalse(progress.hasActive());
assertFalse(progress.hasPending());
assertFalse(iter.isDone());
assertTrue(iter.hasNext());
assertEquals(progress.getFinishedBucketCount(), bucketCount);
assertFalse(progress.isFinished());
StringBuilder desired = new StringBuilder();
desired.append("VDS bucket progress file (50.0% completed)\n");
desired.append(distBits);
desired.append('\n');
// Finished == cursor for this
desired.append(bucketCount);
desired.append('\n');
desired.append(bucketCount);
desired.append('\n');
desired.append(1 << distBits);
desired.append('\n');
assertEquals(desired.toString(), progress.toString());
// Test import, in which case distribution bits are 1
BucketIdFactory idFactory2 = new BucketIdFactory();
// De-serialization with no pending buckets
{
ProgressToken progDs = new ProgressToken(progress.toString());
assertEquals(progDs.getDistributionBitCount(), distBits);
assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
assertEquals(progDs.getFinishedBucketCount(), bucketCount);
VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
assertFalse(progDs.hasPending());
assertFalse(progDs.hasActive());
assertTrue(iterDs.hasNext());
assertFalse(iterDs.isDone());
assertEquals(distBits, iterDs.getDistributionBitCount());
assertEquals(distBits, progDs.getDistributionBitCount());
// Iterator must start up on next bucket in range
VisitorIterator.BucketProgress idDs = iterDs.getNext();
long resumeKey = ProgressToken.makeNthBucketKey(bucketCount, distBits);
assertEquals(idDs.getSuperbucket(), new BucketId(ProgressToken.keyToBucketId(resumeKey)));
assertEquals(idDs.getProgress(), new BucketId());
}
// Now fetch a subset of the remaining buckets without finishing them,
// keeping some in the active set and some in pending
int pendingTotal = 1 << (distBits - 3);
int activeTotal = 1 << (distBits - 3);
Vector<VisitorIterator.BucketProgress> buckets = new Vector<VisitorIterator.BucketProgress>();
// Pre-fetch, since otherwise we'd reuse pending buckets
for (int i = 0; i < pendingTotal + activeTotal; ++i) {
buckets.add(iter.getNext());
}
for (int i = 0; i < pendingTotal + activeTotal; ++i) {
VisitorIterator.BucketProgress idTemp = buckets.get(i);
if (i < activeTotal) {
// Make them 50% done
iter.update(idTemp.getSuperbucket(), new BucketId(distBits + 2, idTemp.getSuperbucket().getId() | (2 << distBits)));
}
// else: leave hanging as active
}
assertEquals(progress.getActiveBucketCount(), activeTotal);
assertEquals(progress.getPendingBucketCount(), pendingTotal);
// we can't reuse the existing string builder, since the bucket cursor
// has changed
desired = new StringBuilder();
desired.append("VDS bucket progress file (").append(progress.percentFinished()).append("% completed)\n");
desired.append(distBits);
desired.append('\n');
desired.append(bucketCount + pendingTotal + activeTotal);
desired.append('\n');
desired.append(bucketCount);
desired.append('\n');
desired.append(1 << distBits);
desired.append('\n');
assertEquals(progress.getBuckets().entrySet().size(), pendingTotal + activeTotal);
for (Map.Entry<ProgressToken.BucketKeyWrapper, ProgressToken.BucketEntry> entry : progress.getBuckets().entrySet()) {
desired.append(Long.toHexString(ProgressToken.keyToBucketId(entry.getKey().getKey())));
desired.append(':');
desired.append(Long.toHexString(entry.getValue().getProgress().getRawId()));
desired.append('\n');
}
assertEquals(progress.toString(), desired.toString());
{
// Deserialization with pending buckets
ProgressToken progDs = new ProgressToken(progress.toString());
assertEquals(progDs.getDistributionBitCount(), distBits);
assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
assertEquals(progDs.getFinishedBucketCount(), bucketCount);
VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
// All started but nonfinished buckets get placed in pending upon
// deserialization
assertEquals(progDs.getPendingBucketCount(), pendingTotal + activeTotal);
assertEquals(distBits, progDs.getDistributionBitCount());
assertEquals(distBits, iterDs.getDistributionBitCount());
assertFalse(progDs.hasActive());
assertTrue(iterDs.hasNext());
assertFalse(iterDs.isDone());
assertEquals(progDs.getBucketCursor(), bucketCount + pendingTotal + activeTotal);
}
// Finish all the active buckets
for (int i = activeTotal; i < activeTotal + pendingTotal; ++i) {
iter.update(buckets.get(i).getSuperbucket(), ProgressToken.FINISHED_BUCKET);
++bucketCount;
}
assertEquals(progress.getActiveBucketCount(), 0);
boolean consistentNext = true;
// Get all pending/remaining sourced and finish them all
while (!iter.isDone()) {
if (!iter.hasNext()) {
consistentNext = false;
break;
}
VisitorIterator.BucketProgress bp = iter.getNext();
iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
++bucketCount;
}
assertTrue(consistentNext);
assertFalse(iter.hasNext());
assertTrue(progress.isFinished());
// Cumulative number of finished buckets must match 2^distbits
assertEquals(bucketCount, 1 << distBits);
StringBuilder finished = new StringBuilder();
finished.append("VDS bucket progress file (100.0% completed)\n");
finished.append(distBits);
finished.append('\n');
// Cursor
finished.append(1 << distBits);
finished.append('\n');
// Finished
finished.append(1 << distBits);
finished.append('\n');
// Total
finished.append(1 << distBits);
finished.append('\n');
assertEquals(progress.toString(), finished.toString());
}
Aggregations