use of org.apache.gobblin.writer.WatermarkStorage in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method testExtractor.
/**
* testExtractor checks that the extractor code does the right thing. First it creates a topic, and sets up a source to point
* to it. workUnits are generated from the source (only a single wU should be returned). Then it writes a record to this topic
* and reads back from the extractor to verify the right record is returned. A second record is then written and read back
* through the extractor to verify poll works as expected. Finally we test the commit api by forcing a commit and then starting
* a new extractor to ensure we fetch data from after the commit. The commit is also verified in Kafka directly
* @throws IOException
* @throws InterruptedException
* @throws DataRecordException
*/
@Test(timeOut = 10000)
public void testExtractor() throws IOException, InterruptedException, DataRecordException {
final String topic = "testSimpleStreamingExtractor";
_kafkaTestHelper.provisionTopic(topic);
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:" + _kafkaTestHelper.getKafkaServerPort());
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
Producer<String, byte[]> producer = new KafkaProducer<>(props);
final byte[] record_1 = { 0, 1, 3 };
final byte[] record_2 = { 2, 4, 6 };
final byte[] record_3 = { 5, 7, 9 };
// Write a sample record to the topic
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_1));
producer.flush();
KafkaSimpleStreamingExtractor<String, byte[]> kSSE = getStreamingExtractor(topic);
TopicPartition tP = new TopicPartition(topic, 0);
KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0));
byte[] reuse = new byte[1];
RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm);
Map<String, CheckpointableWatermark> committedWatermarks = new HashMap<>();
WatermarkStorage mockWatermarkStorage = mock(WatermarkStorage.class);
when(mockWatermarkStorage.getCommittedWatermarks(any(Class.class), any(Iterable.class))).thenReturn(committedWatermarks);
kSSE.start(mockWatermarkStorage);
// read and verify the record matches we just wrote
RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_1);
// write a second record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_2));
producer.flush();
// read the second record using same extractor to verify it matches whats expected
record = kSSE.readRecordEnvelope();
Assert.assertEquals(record.getRecord(), record_2);
// Commit the watermark
committedWatermarks.put(record.getWatermark().getSource(), record.getWatermark());
// write a third record.
producer.send(new ProducerRecord<String, byte[]>(topic, topic, record_3));
producer.flush();
// recreate extractor to force a seek.
kSSE = getStreamingExtractor(topic);
kSSE.start(mockWatermarkStorage);
record = kSSE.readRecordEnvelope();
// check it matches the data written
Assert.assertEquals(record.getRecord(), record_3);
}
use of org.apache.gobblin.writer.WatermarkStorage in project incubator-gobblin by apache.
the class TaskContinuousTest method testContinuousTaskOneRecord.
/**
* Test that a streaming task will work correctly when the extractor only produces one record
* @throws Exception
*/
@Test
public void testContinuousTaskOneRecord() throws Exception {
ArrayList<Object> recordCollector = new ArrayList<>(100);
String testRecord = "hello";
OneRecordExtractor oneRecordExtractor = new OneRecordExtractor(testRecord);
TaskContext mockTaskContext = getMockTaskContext(recordCollector, oneRecordExtractor);
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
// Create a mock TaskStateTracker
TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
// Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
TaskExecutor taskExecutor = new TaskExecutor(new Properties());
// Create the Task
Task task = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
ScheduledExecutorService taskRunner = new ScheduledThreadPoolExecutor(1, ExecutorsUtils.newThreadFactory(Optional.of(log)));
taskRunner.execute(task);
// Let the task run for 2 seconds
int sleepIterations = 2;
int currentIteration = 0;
WatermarkStorage mockWatermarkStorage = mockTaskContext.getWatermarkStorage();
Map<String, CheckpointableWatermark> externalWatermarkStorage;
while (currentIteration < sleepIterations) {
Thread.sleep(1000);
currentIteration++;
externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("default"));
if (!externalWatermarkStorage.isEmpty()) {
for (CheckpointableWatermark watermark : externalWatermarkStorage.values()) {
log.info("Observed committed watermark: {}", watermark);
}
log.info("Task progress: {}", task.getProgress());
// Ensure that watermarks seem reasonable at each step
Assert.assertTrue(oneRecordExtractor.validateWatermarks(false, externalWatermarkStorage));
}
}
// Let's try to shutdown the task
task.shutdown();
log.info("Shutting down task now");
boolean success = task.awaitShutdown(3000);
Assert.assertTrue(success, "Task should shutdown in 3 seconds");
log.info("Task done waiting to shutdown {}", success);
externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("0"));
// Ensure that committed watermarks match exactly the input rows because we shutdown in an orderly manner.
Assert.assertTrue(oneRecordExtractor.validateWatermarks(true, externalWatermarkStorage));
// Ensure that the record made it to the writer correctly
Assert.assertEquals(recordCollector.size(), 1);
Assert.assertEquals(recordCollector.get(0), testRecord);
task.commit();
// Shutdown the executor
taskRunner.shutdown();
taskRunner.awaitTermination(100, TimeUnit.MILLISECONDS);
}
use of org.apache.gobblin.writer.WatermarkStorage in project incubator-gobblin by apache.
the class TaskContinuousTest method getMockTaskContext.
private TaskContext getMockTaskContext(ArrayList<Object> recordCollector, Extractor mockExtractor) throws Exception {
TaskState taskState = getStreamingTaskState();
// Create a mock RowLevelPolicyChecker
RowLevelPolicyChecker mockRowLevelPolicyChecker = new RowLevelPolicyChecker(Lists.newArrayList(), "stateId", FileSystem.getLocal(new Configuration()));
WatermarkStorage mockWatermarkStorage = new MockWatermarkStorage();
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
// Create a mock TaskContext
TaskContext mockTaskContext = mock(TaskContext.class);
when(mockTaskContext.getTaskMetrics()).thenReturn(TaskMetrics.get(taskState));
when(mockTaskContext.getExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getRawSourceExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getWatermarkStorage()).thenReturn(mockWatermarkStorage);
when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
when(mockTaskContext.getTaskState()).thenReturn(taskState);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(new TestStreamingDataWriterBuilder(recordCollector));
return mockTaskContext;
}
Aggregations