use of org.apache.gobblin.publisher.TaskPublisher in project incubator-gobblin by apache.
the class Fork method checkDataQuality.
/**
* Check data quality.
*
* @return whether data publishing is successful and data should be committed
*/
private boolean checkDataQuality(Optional<Object> schema) throws Exception {
if (this.branches > 1) {
this.forkTaskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, this.taskState.getProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED));
this.forkTaskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, this.taskState.getProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED));
}
String writerRecordsWrittenKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_RECORDS_WRITTEN, this.branches, this.index);
if (this.writer.isPresent()) {
this.forkTaskState.setProp(ConfigurationKeys.WRITER_ROWS_WRITTEN, this.writer.get().recordsWritten());
this.taskState.setProp(writerRecordsWrittenKey, this.writer.get().recordsWritten());
} else {
this.forkTaskState.setProp(ConfigurationKeys.WRITER_ROWS_WRITTEN, 0L);
this.taskState.setProp(writerRecordsWrittenKey, 0L);
}
if (schema.isPresent()) {
this.forkTaskState.setProp(ConfigurationKeys.EXTRACT_SCHEMA, schema.get().toString());
}
try {
// Do task-level quality checking
TaskLevelPolicyCheckResults taskResults = this.taskContext.getTaskLevelPolicyChecker(this.forkTaskState, this.branches > 1 ? this.index : -1).executePolicies();
TaskPublisher publisher = this.taskContext.getTaskPublisher(this.forkTaskState, taskResults);
switch(publisher.canPublish()) {
case SUCCESS:
return true;
case CLEANUP_FAIL:
this.logger.error("Cleanup failed for task " + this.taskId);
break;
case POLICY_TESTS_FAIL:
this.logger.error("Not all quality checking passed for task " + this.taskId);
break;
case COMPONENTS_NOT_FINISHED:
this.logger.error("Not all components completed for task " + this.taskId);
break;
default:
break;
}
return false;
} catch (Throwable t) {
this.logger.error("Failed to check task-level data quality", t);
return false;
}
}
use of org.apache.gobblin.publisher.TaskPublisher in project incubator-gobblin by apache.
the class TaskContinuousTest method testContinuousTaskOneRecord.
/**
* Test that a streaming task will work correctly when the extractor only produces one record
* @throws Exception
*/
@Test
public void testContinuousTaskOneRecord() throws Exception {
ArrayList<Object> recordCollector = new ArrayList<>(100);
String testRecord = "hello";
OneRecordExtractor oneRecordExtractor = new OneRecordExtractor(testRecord);
TaskContext mockTaskContext = getMockTaskContext(recordCollector, oneRecordExtractor);
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
// Create a mock TaskStateTracker
TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
// Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
TaskExecutor taskExecutor = new TaskExecutor(new Properties());
// Create the Task
Task task = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
ScheduledExecutorService taskRunner = new ScheduledThreadPoolExecutor(1, ExecutorsUtils.newThreadFactory(Optional.of(log)));
taskRunner.execute(task);
// Let the task run for 2 seconds
int sleepIterations = 2;
int currentIteration = 0;
WatermarkStorage mockWatermarkStorage = mockTaskContext.getWatermarkStorage();
Map<String, CheckpointableWatermark> externalWatermarkStorage;
while (currentIteration < sleepIterations) {
Thread.sleep(1000);
currentIteration++;
externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("default"));
if (!externalWatermarkStorage.isEmpty()) {
for (CheckpointableWatermark watermark : externalWatermarkStorage.values()) {
log.info("Observed committed watermark: {}", watermark);
}
log.info("Task progress: {}", task.getProgress());
// Ensure that watermarks seem reasonable at each step
Assert.assertTrue(oneRecordExtractor.validateWatermarks(false, externalWatermarkStorage));
}
}
// Let's try to shutdown the task
task.shutdown();
log.info("Shutting down task now");
boolean success = task.awaitShutdown(3000);
Assert.assertTrue(success, "Task should shutdown in 3 seconds");
log.info("Task done waiting to shutdown {}", success);
externalWatermarkStorage = mockWatermarkStorage.getCommittedWatermarks(CheckpointableWatermark.class, ImmutableList.of("0"));
// Ensure that committed watermarks match exactly the input rows because we shutdown in an orderly manner.
Assert.assertTrue(oneRecordExtractor.validateWatermarks(true, externalWatermarkStorage));
// Ensure that the record made it to the writer correctly
Assert.assertEquals(recordCollector.size(), 1);
Assert.assertEquals(recordCollector.get(0), testRecord);
task.commit();
// Shutdown the executor
taskRunner.shutdown();
taskRunner.awaitTermination(100, TimeUnit.MILLISECONDS);
}
use of org.apache.gobblin.publisher.TaskPublisher in project incubator-gobblin by apache.
the class TestRecordStream method setupTask.
private Task setupTask(Extractor extractor, DataWriterBuilder writer, List<Converter<?, ?, ?, ?>> converters, List<RecordStreamProcessor<?, ?, ?, ?>> recordStreamProcessors) throws Exception {
// Create a TaskState
TaskState taskState = getEmptyTestTaskState("testRetryTaskId");
taskState.setProp(ConfigurationKeys.TASK_SYNCHRONOUS_EXECUTION_MODEL_KEY, false);
// Create a mock TaskContext
TaskContext mockTaskContext = mock(TaskContext.class);
when(mockTaskContext.getExtractor()).thenReturn(extractor);
when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
when(mockTaskContext.getTaskState()).thenReturn(taskState);
when(mockTaskContext.getConverters()).thenReturn(converters);
when(mockTaskContext.getRecordStreamProcessors()).thenReturn(recordStreamProcessors);
when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(writer);
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
// Create a mock TaskStateTracker
TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
// Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
TaskExecutor taskExecutor = new TaskExecutor(new Properties());
// Create the Task
Task realTask = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
Task task = spy(realTask);
doNothing().when(task).submitTaskCommittedEvent();
return task;
}
use of org.apache.gobblin.publisher.TaskPublisher in project incubator-gobblin by apache.
the class TaskContinuousTest method getMockTaskContext.
private TaskContext getMockTaskContext(ArrayList<Object> recordCollector, Extractor mockExtractor) throws Exception {
TaskState taskState = getStreamingTaskState();
// Create a mock RowLevelPolicyChecker
RowLevelPolicyChecker mockRowLevelPolicyChecker = new RowLevelPolicyChecker(Lists.newArrayList(), "stateId", FileSystem.getLocal(new Configuration()));
WatermarkStorage mockWatermarkStorage = new MockWatermarkStorage();
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
// Create a mock TaskContext
TaskContext mockTaskContext = mock(TaskContext.class);
when(mockTaskContext.getTaskMetrics()).thenReturn(TaskMetrics.get(taskState));
when(mockTaskContext.getExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getRawSourceExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getWatermarkStorage()).thenReturn(mockWatermarkStorage);
when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
when(mockTaskContext.getTaskState()).thenReturn(taskState);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(new TestStreamingDataWriterBuilder(recordCollector));
return mockTaskContext;
}
use of org.apache.gobblin.publisher.TaskPublisher in project incubator-gobblin by apache.
the class TaskTest method getMockTaskContext.
private TaskContext getMockTaskContext(TaskState taskState, Extractor mockExtractor, ArrayList<ArrayList<Object>> writerCollectors, ForkOperator mockForkOperator) throws Exception {
int numForks = writerCollectors.size();
// Create a mock RowLevelPolicyChecker
RowLevelPolicyChecker mockRowLevelPolicyChecker = spy(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
when(mockRowLevelPolicyChecker.executePolicies(any(Object.class), any(RowLevelPolicyCheckResults.class))).thenReturn(true);
when(mockRowLevelPolicyChecker.getFinalState()).thenReturn(new State());
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
// Create a mock TaskContext
TaskContext mockTaskContext = mock(TaskContext.class);
when(mockTaskContext.getExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getRawSourceExtractor()).thenReturn(mockExtractor);
when(mockTaskContext.getForkOperator()).thenReturn(mockForkOperator);
when(mockTaskContext.getTaskState()).thenReturn(taskState);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(mockRowLevelPolicyChecker);
when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
for (int i = 0; i < numForks; ++i) {
when(mockTaskContext.getDataWriterBuilder(numForks, i)).thenReturn(new RecordCollectingWriterBuilder(writerCollectors.get(i)));
}
return mockTaskContext;
}
Aggregations