Search in sources :

Example 26 with BucketIdFactory

use of com.yahoo.document.BucketIdFactory in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testProgressSerializationRange.

public void testProgressSerializationRange() throws ParseException {
    int distBits = 4;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken progress = new ProgressToken();
    // docsel will be unknown --> entire bucket range will be covered
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, distBits, progress);
    assertEquals(progress.getDistributionBitCount(), distBits);
    assertTrue(iter.getBucketSource() instanceof VisitorIterator.DistributionRangeBucketSource);
    assertEquals(progress.getFinishedBucketCount(), 0);
    assertEquals(progress.getTotalBucketCount(), 1 << distBits);
    // First, get+update half of the buckets, marking them as done
    long bucketCount = 0;
    long bucketStop = 1 << (distBits - 1);
    while (iter.hasNext() && bucketCount != bucketStop) {
        VisitorIterator.BucketProgress ids = iter.getNext();
        iter.update(ids.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertEquals(bucketCount, bucketStop);
    // Should be no buckets in limbo at this point
    assertFalse(progress.hasActive());
    assertFalse(progress.hasPending());
    assertFalse(iter.isDone());
    assertTrue(iter.hasNext());
    assertEquals(progress.getFinishedBucketCount(), bucketCount);
    assertFalse(progress.isFinished());
    StringBuilder desired = new StringBuilder();
    desired.append("VDS bucket progress file (50.0% completed)\n");
    desired.append(distBits);
    desired.append('\n');
    // Finished == cursor for this
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(1 << distBits);
    desired.append('\n');
    assertEquals(desired.toString(), progress.toString());
    // Test import, in which case distribution bits are 1
    BucketIdFactory idFactory2 = new BucketIdFactory();
    // De-serialization with no pending buckets
    {
        ProgressToken progDs = new ProgressToken(progress.toString());
        assertEquals(progDs.getDistributionBitCount(), distBits);
        assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
        assertEquals(progDs.getFinishedBucketCount(), bucketCount);
        VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
        assertFalse(progDs.hasPending());
        assertFalse(progDs.hasActive());
        assertTrue(iterDs.hasNext());
        assertFalse(iterDs.isDone());
        assertEquals(distBits, iterDs.getDistributionBitCount());
        assertEquals(distBits, progDs.getDistributionBitCount());
        // Iterator must start up on next bucket in range
        VisitorIterator.BucketProgress idDs = iterDs.getNext();
        long resumeKey = ProgressToken.makeNthBucketKey(bucketCount, distBits);
        assertEquals(idDs.getSuperbucket(), new BucketId(ProgressToken.keyToBucketId(resumeKey)));
        assertEquals(idDs.getProgress(), new BucketId());
    }
    // Now fetch a subset of the remaining buckets without finishing them,
    // keeping some in the active set and some in pending
    int pendingTotal = 1 << (distBits - 3);
    int activeTotal = 1 << (distBits - 3);
    Vector<VisitorIterator.BucketProgress> buckets = new Vector<VisitorIterator.BucketProgress>();
    // Pre-fetch, since otherwise we'd reuse pending buckets
    for (int i = 0; i < pendingTotal + activeTotal; ++i) {
        buckets.add(iter.getNext());
    }
    for (int i = 0; i < pendingTotal + activeTotal; ++i) {
        VisitorIterator.BucketProgress idTemp = buckets.get(i);
        if (i < activeTotal) {
            // Make them 50% done
            iter.update(idTemp.getSuperbucket(), new BucketId(distBits + 2, idTemp.getSuperbucket().getId() | (2 << distBits)));
        }
    // else: leave hanging as active
    }
    assertEquals(progress.getActiveBucketCount(), activeTotal);
    assertEquals(progress.getPendingBucketCount(), pendingTotal);
    // we can't reuse the existing string builder, since the bucket cursor
    // has changed
    desired = new StringBuilder();
    desired.append("VDS bucket progress file (").append(progress.percentFinished()).append("% completed)\n");
    desired.append(distBits);
    desired.append('\n');
    desired.append(bucketCount + pendingTotal + activeTotal);
    desired.append('\n');
    desired.append(bucketCount);
    desired.append('\n');
    desired.append(1 << distBits);
    desired.append('\n');
    assertEquals(progress.getBuckets().entrySet().size(), pendingTotal + activeTotal);
    for (Map.Entry<ProgressToken.BucketKeyWrapper, ProgressToken.BucketEntry> entry : progress.getBuckets().entrySet()) {
        desired.append(Long.toHexString(ProgressToken.keyToBucketId(entry.getKey().getKey())));
        desired.append(':');
        desired.append(Long.toHexString(entry.getValue().getProgress().getRawId()));
        desired.append('\n');
    }
    assertEquals(progress.toString(), desired.toString());
    {
        // Deserialization with pending buckets
        ProgressToken progDs = new ProgressToken(progress.toString());
        assertEquals(progDs.getDistributionBitCount(), distBits);
        assertEquals(progDs.getTotalBucketCount(), 1 << distBits);
        assertEquals(progDs.getFinishedBucketCount(), bucketCount);
        VisitorIterator iterDs = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, progDs);
        // All started but nonfinished buckets get placed in pending upon
        // deserialization
        assertEquals(progDs.getPendingBucketCount(), pendingTotal + activeTotal);
        assertEquals(distBits, progDs.getDistributionBitCount());
        assertEquals(distBits, iterDs.getDistributionBitCount());
        assertFalse(progDs.hasActive());
        assertTrue(iterDs.hasNext());
        assertFalse(iterDs.isDone());
        assertEquals(progDs.getBucketCursor(), bucketCount + pendingTotal + activeTotal);
    }
    // Finish all the active buckets
    for (int i = activeTotal; i < activeTotal + pendingTotal; ++i) {
        iter.update(buckets.get(i).getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertEquals(progress.getActiveBucketCount(), 0);
    boolean consistentNext = true;
    // Get all pending/remaining sourced and finish them all
    while (!iter.isDone()) {
        if (!iter.hasNext()) {
            consistentNext = false;
            break;
        }
        VisitorIterator.BucketProgress bp = iter.getNext();
        iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
        ++bucketCount;
    }
    assertTrue(consistentNext);
    assertFalse(iter.hasNext());
    assertTrue(progress.isFinished());
    // Cumulative number of finished buckets must match 2^distbits
    assertEquals(bucketCount, 1 << distBits);
    StringBuilder finished = new StringBuilder();
    finished.append("VDS bucket progress file (100.0% completed)\n");
    finished.append(distBits);
    finished.append('\n');
    // Cursor
    finished.append(1 << distBits);
    finished.append('\n');
    // Finished
    finished.append(1 << distBits);
    finished.append('\n');
    // Total
    finished.append(1 << distBits);
    finished.append('\n');
    assertEquals(progress.toString(), finished.toString());
}
Also used : BucketId(com.yahoo.document.BucketId) VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken) Vector(java.util.Vector) Map(java.util.Map)

Example 27 with BucketIdFactory

use of com.yahoo.document.BucketIdFactory in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testIsBucketFinished.

public void testIsBucketFinished() throws ParseException {
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, 4, p);
    assertFalse(p.isBucketFinished(new BucketId(32, 0)));
    // Finish superbucket 0x0000
    iter.update(iter.getNext().getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertTrue(p.isBucketFinished(new BucketId(32, 0)));
    // Cursor is 1, but bucket 0x1000 not yet returned
    assertFalse(p.isBucketFinished(new BucketId(32, 1 << 3)));
    VisitorIterator.BucketProgress bp = iter.getNext();
    // Cursor 2, 0x1000 returned but is contained in state, so not finished
    assertFalse(p.isBucketFinished(new BucketId(32, 1 << 3)));
    iter.update(bp.getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertTrue(p.isBucketFinished(new BucketId(32, 1 << 3)));
    // Only superbucket part is used
    // ...0000
    assertTrue(p.isBucketFinished(new BucketId(32, 0x12345670)));
    // ...1000
    assertTrue(p.isBucketFinished(new BucketId(32, 0x12345678)));
    // ...0001
    assertFalse(p.isBucketFinished(new BucketId(32, 0x12345671)));
    // ...1001
    assertFalse(p.isBucketFinished(new BucketId(32, 0x12345679)));
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketId(com.yahoo.document.BucketId) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 28 with BucketIdFactory

use of com.yahoo.document.BucketIdFactory in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testExceptionOnWrongDocumentSelection.

public void testExceptionOnWrongDocumentSelection() throws ParseException {
    BucketIdFactory idFactory = new BucketIdFactory();
    // Since we don't store the actual original document selection in the
    // progress files, we can't really tell whether or not a "wrong" document
    // selection has been given, so we just do a best effort by checking
    // that the number of total buckets match up and that the bucket cursor
    // isn't set for explicit sources
    // Try to pass a known document selection to an unknown docsel iterator
    boolean caughtIt = false;
    try {
        ProgressToken p = new ProgressToken("VDS bucket progress file\n16\n3\n1\n3\n" + "8000000000001f49:0\n8000000000001a85:0\n");
        VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, 16, p);
    } catch (IllegalArgumentException e) {
        caughtIt = true;
    }
    assertTrue(caughtIt);
    // Now try it the other way around
    caughtIt = false;
    try {
        ProgressToken p = new ProgressToken("VDS bucket progress file\n" + "10\n" + "503\n" + "500\n" + "1024\n" + "28000000000000be:0\n" + "28000000000002be:0\n" + "28000000000001be:0\n");
        VisitorIterator.createFromDocumentSelection("id.group=\"yahoo.com\" or id.user=555", idFactory, 16, p);
    } catch (IllegalArgumentException e) {
        caughtIt = true;
    }
    assertTrue(caughtIt);
}
Also used : BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 29 with BucketIdFactory

use of com.yahoo.document.BucketIdFactory in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testRangeDistributionBitDecrease1.

public void testRangeDistributionBitDecrease1() throws ParseException {
    int db = 16;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, db, p);
    VisitorIterator.DistributionRangeBucketSource src = (VisitorIterator.DistributionRangeBucketSource) iter.getBucketSource();
    assertTrue(src.isLosslessResetPossible());
    // active buckets have been updated, 3 merges should be triggered
    for (int i = 0; i < 3; ++i) {
        iter.update(iter.getNext().getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    }
    assertFalse(src.isLosslessResetPossible());
    VisitorIterator.BucketProgress[] bpp = new VisitorIterator.BucketProgress[6];
    for (int i = 0; i < 6; ++i) {
        bpp[i] = iter.getNext();
    }
    // Leave this hanging as active
    VisitorIterator.BucketProgress bpa = iter.getNext();
    for (int i = 0; i < 6; ++i) {
        iter.update(bpp[i].getSuperbucket(), new BucketId());
    }
    assertEquals(p.getBucketCursor(), 10);
    iter.setDistributionBitCount(db - 1);
    assertEquals(iter.getDistributionBitCount(), db - 1);
    assertEquals(p.getDistributionBitCount(), db);
    assertEquals(iter.getBucketSource().getDistributionBitCount(), db - 1);
    // The iterator is waiting patiently for all active buckets to be updated,
    // at which point it will performed the merging and actually updating the
    // progress token's distribution bit count
    assertTrue(iter.getBucketSource().shouldYield());
    assertFalse(iter.hasNext());
    assertFalse(iter.isDone());
    assertEquals(p.getActiveBucketCount(), 1);
    iter.update(bpa.getSuperbucket(), new BucketId());
    assertEquals(p.getDistributionBitCount(), db - 1);
    assertEquals(p.getActiveBucketCount(), 0);
    // 3 merges, P PP PP PP -> P P P P
    assertEquals(p.getPendingBucketCount(), 4);
    assertEquals(p.getFinishedBucketCount(), 1);
    assertEquals(p.getBucketCursor(), 5);
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketId(com.yahoo.document.BucketId) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Example 30 with BucketIdFactory

use of com.yahoo.document.BucketIdFactory in project vespa by vespa-engine.

the class VisitorIteratorTestCase method testRangeDistributionBitChangeWithoutDone.

// Test that intermittent changes in distribution are handled properly, e.g.
// changing from 11 -> 9 with X active and then before all those are flushed,
// the distribution goes up to 12
public void testRangeDistributionBitChangeWithoutDone() throws ParseException {
    int db = 11;
    BucketIdFactory idFactory = new BucketIdFactory();
    ProgressToken p = new ProgressToken();
    VisitorIterator iter = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory, db, p);
    VisitorIterator.DistributionRangeBucketSource src = (VisitorIterator.DistributionRangeBucketSource) iter.getBucketSource();
    VisitorIterator.BucketProgress[] bpp = new VisitorIterator.BucketProgress[4];
    for (int i = 0; i < 4; ++i) {
        bpp[i] = iter.getNext();
    }
    for (int i = 0; i < 2; ++i) {
        iter.update(bpp[i].getSuperbucket(), new BucketId());
    }
    assertFalse(src.isLosslessResetPossible());
    // Now 2 pending, 2 active
    iter.setDistributionBitCount(9);
    assertEquals(p.getDistributionBitCount(), 11);
    assertEquals(p.getActiveBucketCount(), 2);
    assertEquals(p.getPendingBucketCount(), 2);
    assertTrue(iter.getBucketSource().shouldYield());
    // Update as pending, still with old count since there's 1 more active
    // with bpp[2]. Have progress so that lossless reset isn't possible
    iter.update(bpp[3].getSuperbucket(), new BucketId(15, bpp[3].getSuperbucket().getId()));
    iter.setDistributionBitCount(12);
    assertEquals(p.getActiveBucketCount(), 1);
    assertEquals(p.getPendingBucketCount(), 3);
    assertTrue(iter.getBucketSource().shouldYield());
    // Serialize before token is updated to 12 bits
    String serialized = p.toString();
    iter.update(bpp[2].getSuperbucket(), ProgressToken.FINISHED_BUCKET);
    assertEquals(p.getActiveBucketCount(), 0);
    // All active buckets are at db=11, so they should be split once each
    assertEquals(p.getPendingBucketCount(), 3 * 2);
    assertFalse(iter.getBucketSource().shouldYield());
    assertEquals(p.getFinishedBucketCount(), 2);
    // Ensure we get a consistent progress token imported
    ProgressToken p2 = new ProgressToken(serialized);
    // Not yet updated
    assertEquals(p2.getDistributionBitCount(), 11);
    BucketIdFactory idFactory2 = new BucketIdFactory();
    VisitorIterator iter2 = VisitorIterator.createFromDocumentSelection("id.group != \"yahoo.com\"", idFactory2, 1, p2);
    // Not yet updated, since we don't trust the initial BucketIdFactory
    assertEquals(iter2.getDistributionBitCount(), 11);
    assertEquals(p2.getDistributionBitCount(), 11);
    iter2.setDistributionBitCount(12);
    // Now it has been updated
    assertEquals(p2.getDistributionBitCount(), 12);
    assertEquals(p2.getPendingBucketCount(), 8);
    assertEquals(p2.getBucketCursor(), 8);
    assertEquals(p2.getFinishedBucketCount(), 0);
}
Also used : VisitorIterator(com.yahoo.documentapi.VisitorIterator) BucketId(com.yahoo.document.BucketId) BucketIdFactory(com.yahoo.document.BucketIdFactory) ProgressToken(com.yahoo.documentapi.ProgressToken)

Aggregations

BucketIdFactory (com.yahoo.document.BucketIdFactory)35 ProgressToken (com.yahoo.documentapi.ProgressToken)26 BucketId (com.yahoo.document.BucketId)25 VisitorIterator (com.yahoo.documentapi.VisitorIterator)25 DocumentId (com.yahoo.document.DocumentId)4 TreeSet (java.util.TreeSet)2 Map (java.util.Map)1 Vector (java.util.Vector)1