Search in sources :

Example 21 with ProgressToken

use of com.yahoo.documentapi.ProgressToken in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testRangeDistributionIncreaseMultipleBits.

public void testRangeDistributionIncreaseMultipleBits() throws ParseException {
    int db = 16;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, db, p);
    // For this test, have 3 finished bucket, 2 pending and 1 active
    for (int i = 0; i < 3; ++i) {
        iter.update(iter.getNext().getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    }
    VisitorIterator.BucketProgress[] bpp = new VisitorIterator.BucketProgress[2];
    bpp[0] = iter.getNext();
    bpp[1] = iter.getNext();
    // Leave this hanging as active
    VisitorIterator.BucketProgress bpa = iter.getNext();
    iter.update(bpp[0].getSuperbucket(), new BucketId());
    iter.update(bpp[1].getSuperbucket(), new BucketId());
    iter.setDistributionBitCount(20);
    // ProgressToken doesn't change yet, since it had active buckets
    assertEquals(p.getDistributionBitCount(), 16);
    assertEquals(iter.getDistributionBitCount(), 20);
    assertEquals(iter.getBucketSource().getDistributionBitCount(), 20);
    assertFalse(iter.hasNext());
    assertFalse(iter.isDone());
    assertTrue(iter.getBucketSource().shouldYield());
    assertEquals(p.getPendingBucketCount(), 2);
    assertEquals(p.getActiveBucketCount(), 1);
    // Finish active, triggering the consistency fixes
    iter.update(bpa.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertEquals(p.getDistributionBitCount(), 20);
    assertEquals(p.getPendingBucketCount(), 32);
    assertEquals(p.getActiveBucketCount(), 0);
    // Each bucket with db:16 becomes equal to 16 buckets with db:20, so
    // the bucket space position must be 16 * 6 = 96
    assertEquals(p.getBucketCursor(), 96);
    // Each finished bucket also covers less ground, so count is upped
    // accordingly
    assertEquals(p.getFinishedBucketCount(), 16 * 4);
    // when using 20 distribution bits
    for (int i = 0; i < 32; ++i) {
        long testKey = ProgressToken.makeNthBucketKey(i + 48, 20);
        VisitorIterator.BucketProgress bp = iter.getNext();
        assertEquals(bp.getSuperbucket(), new BucketId(ProgressToken.keyToBucketId(testKey)));
        iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    }
    assertEquals(p.getPendingBucketCount(), 0);
    assertEquals(p.getFinishedBucketCount(), 16 * 6);
    // Bucket source should now begin returning from bucket 20:0x6000
    assertEquals(iter.getNext().getSuperbucket(), new BucketId(20, 0x6000));
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketId(com.yahoo.document.BucketId) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 22 with ProgressToken

use of com.yahoo.documentapi.ProgressToken in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testProgressSerializationExplicit.

public void testProgressSerializationExplicit() throws ParseException {
    int distBits = 16;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken progress = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.user == 1234 or id.user == 6789 or id.user == 8009", idFactory, distBits, progress);
    assertEquals(progress.getDistributionBitCount(), distBits);
    assertTrue(iter.getBucketSource() instanceof VisitorIterator.ExplicitBucketSource);
    assertEquals(progress.getFinishedBucketCount(), 0);
    assertEquals(progress.getTotalBucketCount(), 3);
    assertEquals(progress.getPendingBucketCount(), 3);
    VisitorIterator.BucketProgress bp1 = iter.getNext();
    VisitorIterator.BucketProgress bp2 = iter.getNext();
    assertEquals(progress.getPendingBucketCount(), 1);
    assertEquals(progress.getActiveBucketCount(), 2);
    // Buckets are ordered by their reverse bucket id key
    assertEquals(bp1.getSuperbucket(), new BucketId(32, 1234));
    assertEquals(bp1.getProgress(), new BucketId());
    // Put bucket 1234 back into pending
    iter.update(bp1.getSuperbucket(), new BucketId(36, 1234));
    assertEquals(progress.getPendingBucketCount(), 2);
    assertEquals(bp2.getSuperbucket(), new BucketId(32, 8009));
    assertEquals(bp2.getProgress(), new BucketId());
    {
        StringBuilder desired = new StringBuilder();
        desired.append("VDS bucket progress file (").append(progress.percentFinished()).append("% completed)\n");
        desired.append(distBits);
        desired.append('\n');
        desired.append(0);
        desired.append('\n');
        desired.append(0);
        desired.append('\n');
        desired.append(3);
        desired.append('\n');
        // Pending/active buckets are written in an increasing (key, not
        // bucket-id!) order
        desired.append(Long.toHexString(new BucketId(32, 1234).getRawId()));
        desired.append(':');
        desired.append(Long.toHexString(new BucketId(36, 1234).getRawId()));
        desired.append('\n');
        desired.append(Long.toHexString(new BucketId(32, 8009).getRawId()));
        desired.append(":0\n");
        desired.append(Long.toHexString(new BucketId(32, 6789).getRawId()));
        desired.append(":0\n");
        assertEquals(desired.toString(), progress.toString());
        ProgressToken prog2 = new ProgressToken(progress.toString());
        assertEquals(prog2.getDistributionBitCount(), distBits);
        assertEquals(prog2.getTotalBucketCount(), 3);
        assertEquals(prog2.getFinishedBucketCount(), 0);
        VisitorIterator iter2 = VisitorIterator.createFromDocumentSelection("id.user == 1234 or id.user == 6789 or id.user == 8009", idFactory, distBits, prog2);
        assertEquals(prog2.getPendingBucketCount(), 3);
        assertFalse(prog2.hasActive());
        assertTrue(iter2.hasNext());
        assertFalse(iter2.isDone());
        assertTrue(iter2.getBucketSource() instanceof VisitorIterator.ExplicitBucketSource);
        assertFalse(iter2.getBucketSource().hasNext());
        VisitorIterator.BucketProgress bp = iter2.getNext();
        assertEquals(bp.getSuperbucket(), new BucketId(32, 1234));
        assertEquals(bp.getProgress(), new BucketId(36, 1234));
        assertEquals(prog2.getPendingBucketCount(), 2);
        assertTrue(iter2.hasNext());
        assertFalse(iter2.isDone());
        bp = iter2.getNext();
        assertEquals(bp.getSuperbucket(), new BucketId(32, 8009));
        assertEquals(bp.getProgress(), new BucketId());
        assertEquals(prog2.getPendingBucketCount(), 1);
        assertTrue(iter2.hasNext());
        assertFalse(iter2.isDone());
        bp = iter2.getNext();
        assertEquals(prog2.getPendingBucketCount(), 0);
        assertEquals(bp.getSuperbucket(), new BucketId(32, 6789));
        assertEquals(bp.getProgress(), new BucketId());
        assertFalse(iter2.hasNext());
        // Active buckets
        assertFalse(iter2.isDone());
        assertEquals(prog2.getActiveBucketCount(), 3);
    }
    // Finish off all active buckets
    assertTrue(iter.hasNext());
    assertFalse(iter.isDone());
    bp1 = iter.getNext();
    assertEquals(bp1.getSuperbucket(), new BucketId(32, 1234));
    assertEquals(bp1.getProgress(), new BucketId(36, 1234));
    iter.update(bp1.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertTrue(iter.hasNext());
    assertFalse(iter.isDone());
    bp1 = iter.getNext();
    assertEquals(bp1.getSuperbucket(), new BucketId(32, 6789));
    assertEquals(bp1.getProgress(), new BucketId());
    // Just to make sure Java serializes the long properly
    assertEquals(progress.toString(), "VDS bucket progress file (" + progress.percentFinished() + "% completed)\n" + "16\n" + "0\n" + "1\n" + "3\n" + "8000000000001f49:0\n" + "8000000000001a85:0\n");
    iter.update(bp1.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    // At this point, we've got one active but no pending, so hasNext == false,
    // but isDone is also == false
    assertFalse(iter.hasNext());
    assertFalse(iter.isDone());
    assertEquals(progress.getPendingBucketCount(), 0);
    assertEquals(progress.getActiveBucketCount(), 1);
    iter.update(bp2.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertFalse(iter.hasNext());
    assertTrue(iter.isDone());
    assertTrue(progress.isFinished());
    assertEquals(progress.getActiveBucketCount(), 0);
    {
        StringBuilder finished = new StringBuilder();
        finished.append("VDS bucket progress file (100.0% completed)\n");
        finished.append(distBits);
        finished.append('\n');
        // Cursor (not used by explicit)
        finished.append(0);
        finished.append('\n');
        // Finished
        finished.append(3);
        finished.append('\n');
        // Total
        finished.append(3);
        finished.append('\n');
        assertEquals(finished.toString(), progress.toString());
    }
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketId(com.yahoo.document.BucketId) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 23 with ProgressToken

use of com.yahoo.documentapi.ProgressToken in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testImportInconsistentProgressDecrease.

public void testImportInconsistentProgressDecrease() throws ParseException {
    // Bucket progress "file" that upon time of changing from 4 to 7
    // distribution bits and writing the progress had an active bucket
    String input = "VDS bucket progress file\n" + "7\n" + "32\n" + "24\n" + "128\n" + "100000000000000c:0\n";
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken(input);
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, 1, p);
    assertEquals(iter.getDistributionBitCount(), 7);
    // Now we're at 6 distribution bits
    iter.setDistributionBitCount(6);
    assertEquals(iter.getDistributionBitCount(), 6);
    assertEquals(p.getDistributionBitCount(), 6);
    assertEquals(iter.getBucketSource().getDistributionBitCount(), 6);
    assertEquals(p.getTotalBucketCount(), 1 << 6);
    assertEquals(p.getFinishedBucketCount(), 24 >> 1);
    // Split 4 -> 7 bits, merge 7 -> 6 bits
    assertEquals(p.getPendingBucketCount(), 1 << 2);
    assertEquals(p.getActiveBucketCount(), 0);
    assertEquals(p.getBucketCursor(), 24 / 2 + (1 << 2));
    assertTrue(iter.hasNext());
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 24 with ProgressToken

use of com.yahoo.documentapi.ProgressToken in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testExplicitDistributionBitDecrease.

public void testExplicitDistributionBitDecrease() throws ParseException {
    int distBits = 20;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.user == 1234 or id.user == 6789 or id.user == 8009", idFactory, distBits, p);
    assertEquals(iter.getDistributionBitCount(), distBits);
    assertEquals(p.getDistributionBitCount(), distBits);
    assertEquals(iter.getBucketSource().getDistributionBitCount(), distBits);
    iter.update(iter.getNext().getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    iter.setDistributionBitCount(16);
    assertEquals(iter.getDistributionBitCount(), 16);
    assertEquals(p.getDistributionBitCount(), 16);
    assertEquals(iter.getBucketSource().getDistributionBitCount(), 16);
    // Changing dist bits for explicit source should change nothing
    assertEquals(p.getPendingBucketCount(), 2);
    assertEquals(p.getFinishedBucketCount(), 1);
    assertEquals(p.getTotalBucketCount(), 3);
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 25 with ProgressToken

use of com.yahoo.documentapi.ProgressToken in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testProgressSerializationRange.

public void testProgressSerializationRange() throws ParseException {
    int distBits = 4;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken progress = new ProgressToken();
    // docsel will be unknown --> entire bucket range will be covered
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, distBits, progress);
    assertEquals(progress.getDistributionBitCount(), distBits);
    assertTrue(iter.getBucketSource() instanceof VisitorIterator.DistributionRangeBucketSource);
    assertEquals(progress.getFinishedBucketCount(), 0);
    assertEquals(progress.getTotalBucketCount(), 1 << distBits);
    // First, get+update half of the buckets, marking them as done
    long bucketCount = 0;
    long bucketStop = 1 << (distBits - 1);
    while (iter.hasNext() && bucketCount != bucketStop) {
        VisitorIterator.BucketProgress ids = iter.getNext();
        iter.update(ids.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertEquals(bucketCount, bucketStop);
    // Should be no buckets in limbo at this point
    assertFalse(progress.hasActive());
    assertFalse(progress.hasPending());
    assertFalse(iter.isDone());
    assertTrue(iter.hasNext());
    assertEquals(progress.getFinishedBucketCount(), bucketCount);
    assertFalse(progress.isFinished());
    StringBuilder desired = new StringBuilder();
    desired.append("VDS bucket progress file (50.0% completed)\n");
    desired.append(distBits);
    desired.append('\n');
    // Finished == cursor for this
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(1 << distBits);
    desired.append('\n');
    assertEquals(desired.toString(), progress.toString());
    // Test import, in which case distribution bits are 1
    BucketIdFactory idFactory2 = new BucketIdFactory();
    // De-serialization with no pending buckets
    {
        ProgressToken progDs = new ProgressToken(progress.toString());
        assertEquals(progDs.getDistributionBitCount(), distBits);
        assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
        assertEquals(progDs.getFinishedBucketCount(), bucketCount);
        VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
        assertFalse(progDs.hasPending());
        assertFalse(progDs.hasActive());
        assertTrue(iterDs.hasNext());
        assertFalse(iterDs.isDone());
        assertEquals(distBits, iterDs.getDistributionBitCount());
        assertEquals(distBits, progDs.getDistributionBitCount());
        // Iterator must start up on next bucket in range
        VisitorIterator.BucketProgress idDs = iterDs.getNext();
        long resumeKey = ProgressToken.makeNthBucketKey(bucketCount, distBits);
        assertEquals(idDs.getSuperbucket(), new BucketId(ProgressToken.keyToBucketId(resumeKey)));
        assertEquals(idDs.getProgress(), new BucketId());
    }
    // Now fetch a subset of the remaining buckets without finishing them,
    // keeping some in the active set and some in pending
    int pendingTotal = 1 << (distBits - 3);
    int activeTotal = 1 << (distBits - 3);
    Vector<VisitorIterator.BucketProgress> buckets = new Vector<VisitorIterator.BucketProgress>();
    // Pre-fetch, since otherwise we'd reuse pending buckets
    for (int i = 0; i < pendingTotal + activeTotal; ++i) {
        buckets.add(iter.getNext());
    }
    for (int i = 0; i < pendingTotal + activeTotal; ++i) {
        VisitorIterator.BucketProgress idTemp = buckets.get(i);
        if (i < activeTotal) {
            // Make them 50% done
            iter.update(idTemp.getSuperbucket(), new BucketId(distBits + 2, idTemp.getSuperbucket().getId() | (2 << distBits)));
        }
    // else: leave hanging as active
    }
    assertEquals(progress.getActiveBucketCount(), activeTotal);
    assertEquals(progress.getPendingBucketCount(), pendingTotal);
    // we can't reuse the existing string builder, since the bucket cursor
    // has changed
    desired = new StringBuilder();
    desired.append("VDS bucket progress file (").append(progress.percentFinished()).append("% completed)\n");
    desired.append(distBits);
    desired.append('\n');
    desired.append(bucketCount + pendingTotal + activeTotal);
    desired.append('\n');
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(1 << distBits);
    desired.append('\n');
    assertEquals(progress.getBuckets().entrySet().size(), pendingTotal + activeTotal);
    for (Map.Entry<ProgressToken.BucketKeyWrapper, ProgressToken.BucketEntry> entry : progress.getBuckets().entrySet()) {
        desired.append(Long.toHexString(ProgressToken.keyToBucketId(entry.getKey().getKey())));
        desired.append(':');
        desired.append(Long.toHexString(entry.getValue().getProgress().getRawId()));
        desired.append('\n');
    }
    assertEquals(progress.toString(), desired.toString());
    {
        // Deserialization with pending buckets
        ProgressToken progDs = new ProgressToken(progress.toString());
        assertEquals(progDs.getDistributionBitCount(), distBits);
        assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
        assertEquals(progDs.getFinishedBucketCount(), bucketCount);
        VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
        // All started but nonfinished buckets get placed in pending upon
        // deserialization
        assertEquals(progDs.getPendingBucketCount(), pendingTotal + activeTotal);
        assertEquals(distBits, progDs.getDistributionBitCount());
        assertEquals(distBits, iterDs.getDistributionBitCount());
        assertFalse(progDs.hasActive());
        assertTrue(iterDs.hasNext());
        assertFalse(iterDs.isDone());
        assertEquals(progDs.getBucketCursor(), bucketCount + pendingTotal + activeTotal);
    }
    // Finish all the active buckets
    for (int i = activeTotal; i < activeTotal + pendingTotal; ++i) {
        iter.update(buckets.get(i).getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertEquals(progress.getActiveBucketCount(), 0);
    boolean consistentNext = true;
    // Get all pending/remaining sourced and finish them all
    while (!iter.isDone()) {
        if (!iter.hasNext()) {
            consistentNext = false;
            break;
        }
        VisitorIterator.BucketProgress bp = iter.getNext();
        iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertTrue(consistentNext);
    assertFalse(iter.hasNext());
    assertTrue(progress.isFinished());
    // Cumulative number of finished buckets must match 2^distbits
    assertEquals(bucketCount, 1 << distBits);
    StringBuilder finished = new StringBuilder();
    finished.append("VDS bucket progress file (100.0% completed)\n");
    finished.append(distBits);
    finished.append('\n');
    // Cursor
    finished.append(1 << distBits);
    finished.append('\n');
    // Finished
    finished.append(1 << distBits);
    finished.append('\n');
    // Total
    finished.append(1 << distBits);
    finished.append('\n');
    assertEquals(progress.toString(), finished.toString());
}
Also used : BucketId(com.yahoo.document.BucketId) VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken) Vector(java.util.Vector) Map(java.util.Map)

Aggregations

ProgressToken (com.yahoo.documentapi.ProgressToken)35 BucketIdFactory (com.yahoo.document.BucketIdFactory)26 VisitorIterator (com.yahoo.documentapi.VisitorIterator)25 BucketId (com.yahoo.document.BucketId)22 TreeSet (java.util.TreeSet)3 Test (org.junit.Test)3 ParseException (com.yahoo.document.select.parser.ParseException)1 VisitorParameters (com.yahoo.documentapi.VisitorParameters)1 Map (java.util.Map)1 Vector (java.util.Vector)1