Search in sources :

Example 11 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class FineGrainedWatermarkTrackerTest method verifyCommitables.

private static void verifyCommitables(FineGrainedWatermarkTracker tracker, SortedSet<Integer> holes, long maxWatermark) {
    // commitable should be the first hole -1
    // uncommitable should be the first hole
    Map<String, CheckpointableWatermark> uncommitted = tracker.getUnacknowledgedWatermarks();
    if (holes.isEmpty()) {
        Assert.assertEquals(uncommitted.size(), 0);
    } else {
        Assert.assertEquals(uncommitted.size(), 1);
        CheckpointableWatermark uncommitable = uncommitted.get("default");
        Assert.assertEquals(((LongWatermark) uncommitable.getWatermark()).getValue(), (long) holes.first());
    }
    Map<String, CheckpointableWatermark> commitables = tracker.getCommittableWatermarks();
    if (holes.contains(0)) {
        // if the first record didn't get an ack
        Assert.assertEquals(commitables.size(), 0);
    } else {
        Assert.assertEquals(commitables.size(), 1);
        CheckpointableWatermark commitable = commitables.get("default");
        if (holes.isEmpty()) {
            Assert.assertEquals(((LongWatermark) commitable.getWatermark()).getValue(), maxWatermark);
        } else {
            Assert.assertEquals(((LongWatermark) commitable.getWatermark()).getValue(), holes.first() - 1);
        }
    }
}
Also used : CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark)

Example 12 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class FineGrainedWatermarkTrackerTest method testWatermarkTracker.

/**
 * Single threaded test that creates attempts, acknowledges a few at random
 * then checks if the getCommitables method is returning the right values.
 * Runs a few iterations.
 */
@Test
public static void testWatermarkTracker() {
    Random random = new Random();
    Config config = ConfigFactory.empty();
    for (int j = 0; j < 100; ++j) {
        FineGrainedWatermarkTracker tracker = new FineGrainedWatermarkTracker(config);
        int numWatermarks = 1 + random.nextInt(1000);
        AcknowledgableWatermark[] acknowledgableWatermarks = new AcknowledgableWatermark[numWatermarks];
        for (int i = 0; i < numWatermarks; ++i) {
            CheckpointableWatermark checkpointableWatermark = new DefaultCheckpointableWatermark("default", new LongWatermark(i));
            AcknowledgableWatermark ackable = new AcknowledgableWatermark(checkpointableWatermark);
            acknowledgableWatermarks[i] = ackable;
            tracker.track(ackable);
        }
        // Create some random holes. Don't fire acknowledgements for these messages.
        int numMissingAcks = random.nextInt(numWatermarks);
        SortedSet<Integer> holes = new TreeSet<>();
        for (int i = 0; i < numMissingAcks; ++i) {
            holes.add(random.nextInt(numWatermarks));
        }
        for (int i = 0; i < numWatermarks; ++i) {
            if (!holes.contains(i)) {
                acknowledgableWatermarks[i].ack();
            }
        }
        verifyCommitables(tracker, holes, numWatermarks - 1);
        // verify that sweeping doesn't have any side effects on correctness
        tracker.sweep();
        verifyCommitables(tracker, holes, numWatermarks - 1);
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Random(java.util.Random) Config(com.typesafe.config.Config) TreeSet(java.util.TreeSet) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 13 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class MultiWriterWatermarkManagerTest method testFailingWatermarkStorage.

/**
 * Test that when we have commits failing to watermark storage, the manager continues to try
 * at every interval and keeps track of the exception it is seeing.
 */
@Test
public void testFailingWatermarkStorage() throws IOException, InterruptedException {
    WatermarkStorage reallyBadWatermarkStorage = mock(WatermarkStorage.class);
    IOException exceptionToThrow = new IOException("Failed to write coz the programmer told me to");
    doThrow(exceptionToThrow).when(reallyBadWatermarkStorage).commitWatermarks(any(Iterable.class));
    long commitInterval = 1000;
    MultiWriterWatermarkManager watermarkManager = new MultiWriterWatermarkManager(reallyBadWatermarkStorage, commitInterval, Optional.<Logger>absent());
    WatermarkAwareWriter mockWriter = mock(WatermarkAwareWriter.class);
    CheckpointableWatermark watermark = new DefaultCheckpointableWatermark("default", new LongWatermark(0));
    when(mockWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap("default", watermark));
    watermarkManager.registerWriter(mockWriter);
    try {
        watermarkManager.start();
    } catch (Exception e) {
        Assert.fail("Should not throw exception", e);
    }
    // sleep for 2.5 iterations
    Thread.sleep(commitInterval * 2 + (commitInterval / 2));
    watermarkManager.close();
    // 2 calls from iterations, 1 additional attempt due to close
    int expectedCalls = 3;
    verify(reallyBadWatermarkStorage, atLeast(expectedCalls)).commitWatermarks(any(Iterable.class));
    Assert.assertEquals(watermarkManager.getCommitStatus().getLastCommitException(), exceptionToThrow, "Testing tracking of failed exceptions");
}
Also used : IOException(java.io.IOException) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) IOException(java.io.IOException) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 14 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class MultiWriterWatermarkTracker method getCommittableWatermark.

public Optional<CheckpointableWatermark> getCommittableWatermark(String source) {
    Set<CheckpointableWatermark> unacked = unacknowledgedWatermarks.get(source);
    CheckpointableWatermark minUnacknowledgedWatermark = (unacked == null || unacked.isEmpty()) ? null : unacked.iterator().next();
    CheckpointableWatermark highestCommitableWatermark = null;
    for (CheckpointableWatermark commitableWatermark : candidateCommittables.get(source)) {
        if ((minUnacknowledgedWatermark == null) || (commitableWatermark.compareTo(minUnacknowledgedWatermark) < 0)) {
            // commitableWatermark < minUnacknowledgedWatermark
            highestCommitableWatermark = commitableWatermark;
        }
    }
    if (highestCommitableWatermark == null) {
        return Optional.absent();
    } else {
        return Optional.of(highestCommitableWatermark);
    }
}
Also used : CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark)

Example 15 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class StateStoreBasedWatermarkStorage method commitWatermarks.

@Override
public void commitWatermarks(Iterable<CheckpointableWatermark> watermarks) throws IOException {
    for (CheckpointableWatermark watermark : watermarks) {
        String tableName = watermark.getSource();
        _stateStore.put(_storeName, tableName, new CheckpointableWatermarkState(watermark, GSON));
    }
}
Also used : CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark)

Aggregations

CheckpointableWatermark (org.apache.gobblin.source.extractor.CheckpointableWatermark)15 DefaultCheckpointableWatermark (org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark)11 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)9 Test (org.testng.annotations.Test)9 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Random (java.util.Random)3 TreeSet (java.util.TreeSet)3 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)3 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 HashMap (java.util.HashMap)2 WatermarkStorage (org.apache.gobblin.writer.WatermarkStorage)2 Config (com.typesafe.config.Config)1 List (java.util.List)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 State (org.apache.gobblin.configuration.State)1