Search in sources :

Example 6 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class StateStoreWatermarkStorageTest method testPersistWatermarkStateToZk.

@Test
public void testPersistWatermarkStateToZk() throws IOException {
    CheckpointableWatermark watermark = new DefaultCheckpointableWatermark("source", new LongWatermark(startTime));
    TaskState taskState = new TaskState();
    taskState.setJobId(TEST_JOB_ID);
    taskState.setProp(ConfigurationKeys.JOB_NAME_KEY, "JobName-" + startTime);
    // watermark storage configuration
    taskState.setProp(StateStoreBasedWatermarkStorage.WATERMARK_STORAGE_TYPE_KEY, "zk");
    taskState.setProp(StateStoreBasedWatermarkStorage.WATERMARK_STORAGE_CONFIG_PREFIX + ZkStateStoreConfigurationKeys.STATE_STORE_ZK_CONNECT_STRING_KEY, testingServer.getConnectString());
    StateStoreBasedWatermarkStorage watermarkStorage = new StateStoreBasedWatermarkStorage(taskState);
    watermarkStorage.commitWatermarks(ImmutableList.of(watermark));
    Map<String, CheckpointableWatermark> watermarkMap = watermarkStorage.getCommittedWatermarks(DefaultCheckpointableWatermark.class, ImmutableList.of("source"));
    Assert.assertEquals(watermarkMap.size(), 1);
    Assert.assertEquals(((LongWatermark) watermarkMap.get("source").getWatermark()).getValue(), startTime);
}
Also used : DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 7 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class StateStoreBasedWatermarkStorage method getCommittedWatermarks.

@Override
public Map<String, CheckpointableWatermark> getCommittedWatermarks(Class<? extends CheckpointableWatermark> watermarkClass, Iterable<String> sourcePartitions) throws IOException {
    Map<String, CheckpointableWatermark> committed = new HashMap<String, CheckpointableWatermark>();
    for (String sourcePartition : sourcePartitions) {
        CheckpointableWatermarkState watermarkState = _stateStore.get(_storeName, sourcePartition, sourcePartition);
        if (watermarkState != null) {
            CheckpointableWatermark watermark = GSON.fromJson(watermarkState.getProp(sourcePartition), watermarkClass);
            committed.put(sourcePartition, watermark);
        }
    }
    if (committed.isEmpty()) {
        log.warn("Didn't find any committed watermarks");
    }
    return committed;
}
Also used : HashMap(java.util.HashMap) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark)

Example 8 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class TaskContinuousTest method testContinuousTaskOneRecord.

/**
 * Test that a streaming task will work correctly when the extractor only produces one record
 * @throws Exception
 */
@Test
public void testContinuousTaskOneRecord() throws Exception {
    ArrayList<Object> recordCollector = new ArrayList<>(100);
    String testRecord = "hello";
    OneRecordExtractor oneRecordExtractor = new OneRecordExtractor(testRecord);
    TaskContext mockTaskContext = getMockTaskContext(recordCollector, oneRecordExtractor);
    // Create a mock TaskPublisher
    TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
    when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
    when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
    // Create a mock TaskStateTracker
    TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
    // Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
    TaskExecutor taskExecutor = new TaskExecutor(new Properties());
    // Create the Task
    Task task = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
    ScheduledExecutorService taskRunner = new ScheduledThreadPoolExecutor(1, ExecutorsUtils.newThreadFactory(Optional.of(log)));
    taskRunner.execute(task);
    // Let the task run for 2 seconds
    int sleepIterations = 2;
    int currentIteration = 0;
    WatermarkStorage mockWatermarkStorage = mockTaskContext.getWatermarkStorage();
    Map<String, CheckpointableWatermark> externalWatermarkStorage;
    while (currentIteration < sleepIterations) {
        Thread.sleep(1000);
        currentIteration++;
        externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("default"));
        if (!externalWatermarkStorage.isEmpty()) {
            for (CheckpointableWatermark watermark : externalWatermarkStorage.values()) {
                log.info("Observed committed watermark: {}", watermark);
            }
            log.info("Task progress: {}", task.getProgress());
            // Ensure that watermarks seem reasonable at each step
            Assert.assertTrue(oneRecordExtractor.validateWatermarks(false, externalWatermarkStorage));
        }
    }
    // Let's try to shutdown the task
    task.shutdown();
    log.info("Shutting down task now");
    boolean success = task.awaitShutdown(3000);
    Assert.assertTrue(success, "Task should shutdown in 3 seconds");
    log.info("Task done waiting to shutdown {}", success);
    externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("0"));
    // Ensure that committed watermarks match exactly the input rows because we shutdown in an orderly manner.
    Assert.assertTrue(oneRecordExtractor.validateWatermarks(true, externalWatermarkStorage));
    // Ensure that the record made it to the writer correctly
    Assert.assertEquals(recordCollector.size(), 1);
    Assert.assertEquals(recordCollector.get(0), testRecord);
    task.commit();
    // Shutdown the executor
    taskRunner.shutdown();
    taskRunner.awaitTermination(100, TimeUnit.MILLISECONDS);
}
Also used : WatermarkStorage(org.apache.gobblin.writer.WatermarkStorage) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) ArrayList(java.util.ArrayList) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) Properties(java.util.Properties) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) Test(org.testng.annotations.Test)

Example 9 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class TaskContinuousTest method testContinuousTask.

/**
 * Test that a streaming task will work correctly when an extractor is continuously producing records
 * No converters
 * Identity fork
 * One writer
 * @throws Exception
 */
@Test
public void testContinuousTask() throws Exception {
    // Create a TaskState
    TaskState taskState = getStreamingTaskState();
    ArrayList<Object> recordCollector = new ArrayList<>(100);
    // 1 second per record
    long perRecordExtractLatencyMillis = 1000;
    ContinuousExtractor continuousExtractor = new ContinuousExtractor(perRecordExtractLatencyMillis);
    TaskContext mockTaskContext = getMockTaskContext(recordCollector, continuousExtractor);
    // Create a mock TaskStateTracker
    TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
    // Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
    TaskExecutor taskExecutor = new TaskExecutor(new Properties());
    // Create the Task
    Task task = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
    ScheduledExecutorService taskRunner = new ScheduledThreadPoolExecutor(1, ExecutorsUtils.newThreadFactory(Optional.of(log)));
    taskRunner.execute(task);
    // Let the task run for 10 seconds
    int sleepIterations = 10;
    int currentIteration = 0;
    while (currentIteration < sleepIterations) {
        Thread.sleep(1000);
        currentIteration++;
        Map<String, CheckpointableWatermark> externalWatermarkStorage = mockTaskContext.getWatermarkStorage().getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("default"));
        if (!externalWatermarkStorage.isEmpty()) {
            for (CheckpointableWatermark watermark : externalWatermarkStorage.values()) {
                log.info("Observed committed watermark: {}", watermark);
            }
            log.info("Task progress: {}", task.getProgress());
            // Ensure that watermarks seem reasonable at each step
            Assert.assertTrue(continuousExtractor.validateWatermarks(false, externalWatermarkStorage));
        }
    }
    // Let's try to shutdown the task
    task.shutdown();
    log.info("Shutting down task now");
    boolean success = task.awaitShutdown(30000);
    Assert.assertTrue(success, "Task should shutdown in 3 seconds");
    log.info("Task done waiting to shutdown {}", success);
    // Ensure that committed watermarks match exactly the input rows because we shutdown in an orderly manner.
    Assert.assertTrue(continuousExtractor.validateWatermarks(true, mockTaskContext.getWatermarkStorage().getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("default"))));
    task.commit();
    // Shutdown the executor
    taskRunner.shutdown();
    taskRunner.awaitTermination(100, TimeUnit.MILLISECONDS);
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) Test(org.testng.annotations.Test)

Example 10 with CheckpointableWatermark

use of org.apache.gobblin.source.extractor.CheckpointableWatermark in project incubator-gobblin by apache.

the class ConsoleWriterTest method writeEnvelope.

private void writeEnvelope(ConsoleWriter consoleWriter, String content, String source, long value) throws IOException {
    CheckpointableWatermark watermark = new DefaultCheckpointableWatermark(source, new LongWatermark(value));
    AcknowledgableWatermark ackable = new AcknowledgableWatermark(watermark);
    RecordEnvelope<String> mockEnvelope = (RecordEnvelope<String>) new RecordEnvelope<>(content).addCallBack(ackable);
    consoleWriter.writeEnvelope(mockEnvelope);
    Assert.assertTrue(ackable.isAcked());
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark)

Aggregations

CheckpointableWatermark (org.apache.gobblin.source.extractor.CheckpointableWatermark)15 DefaultCheckpointableWatermark (org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark)11 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)9 Test (org.testng.annotations.Test)9 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Random (java.util.Random)3 TreeSet (java.util.TreeSet)3 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)3 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 HashMap (java.util.HashMap)2 WatermarkStorage (org.apache.gobblin.writer.WatermarkStorage)2 Config (com.typesafe.config.Config)1 List (java.util.List)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 State (org.apache.gobblin.configuration.State)1