use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.
the class AsyncConverter1to1Test method test1to1.
@Test
public void test1to1() throws Exception {
MyAsyncConverter1to1 converter = new MyAsyncConverter1to1();
List<Throwable> errors = Lists.newArrayList();
AtomicBoolean done = new AtomicBoolean(false);
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(AsyncConverter1to1.MAX_CONCURRENT_ASYNC_CONVERSIONS_KEY, 3);
RecordStreamWithMetadata<String, String> stream = new RecordStreamWithMetadata<>(Flowable.range(0, 5).map(i -> i.toString()).map(RecordEnvelope::new), GlobalMetadata.<String>builder().schema("schema").build());
Set<String> outputRecords = Sets.newConcurrentHashSet();
converter.processStream(stream, workUnitState).getRecordStream().subscribeOn(Schedulers.newThread()).subscribe(r -> outputRecords.add(((RecordEnvelope<String>) r).getRecord()), errors::add, () -> done.set(true));
// Release record 0
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("0")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("0")).await());
Assert.assertEquals(outputRecords.size(), 1);
// Record 4 should not be in the queue yet (max concurrent conversions is 3).
Assert.assertFalse(ExponentialBackoff.awaitCondition().maxWait(200L).callable(() -> converter.completeFutureIfPresent("4")).await());
// Release record 3 (out of order)
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("3")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("3")).await());
// only two records have been released
Assert.assertEquals(outputRecords.size(), 2);
// Release record 4 (now in queue)
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("4")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("4")).await());
Assert.assertEquals(outputRecords.size(), 3);
// Release records 1 and 2
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("1")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("2")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.size() == 5).await());
Assert.assertEquals(outputRecords, Sets.newHashSet("0", "1", "2", "3", "4"));
Assert.assertTrue(errors.isEmpty());
Assert.assertTrue(done.get());
}
use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.
the class TestRecordStream method testAcks.
@Test
public void testAcks() throws Exception {
StreamEntity[] entities = new StreamEntity[] { new RecordEnvelope<>("a"), new BasicTestControlMessage("1"), new RecordEnvelope<>("b"), new BasicTestControlMessage("2") };
BasicAckableForTesting ackable = new BasicAckableForTesting();
for (int i = 0; i < entities.length; i++) {
entities[i].addCallBack(ackable);
}
MyExtractor extractor = new MyExtractor(entities);
MyConverter converter = new MyConverter();
MyDataWriter writer = new MyDataWriter();
// Create a TaskState
TaskState taskState = getEmptyTestTaskState("testRetryTaskId");
taskState.setProp(ConfigurationKeys.TASK_SYNCHRONOUS_EXECUTION_MODEL_KEY, false);
// Create a mock TaskContext
TaskContext mockTaskContext = mock(TaskContext.class);
when(mockTaskContext.getExtractor()).thenReturn(extractor);
when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
when(mockTaskContext.getTaskState()).thenReturn(taskState);
when(mockTaskContext.getConverters()).thenReturn(Lists.newArrayList(converter));
when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(writer);
// Create a mock TaskPublisher
TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
// Create a mock TaskStateTracker
TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
// Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
TaskExecutor taskExecutor = new TaskExecutor(new Properties());
// Create the Task
Task realTask = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
Task task = spy(realTask);
doNothing().when(task).submitTaskCommittedEvent();
task.run();
task.commit();
Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
Assert.assertEquals(ackable.acked, 4);
}
use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.
the class Fork method consumeRecordStream.
@SuppressWarnings(value = "RV_RETURN_VALUE_IGNORED", justification = "We actually don't care about the return value of subscribe.")
public void consumeRecordStream(RecordStreamWithMetadata<D, S> stream) throws RecordStreamProcessor.StreamProcessingException {
if (this.converter instanceof MultiConverter) {
// if multiconverter, unpack it
for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
stream = cverter.processStream(stream, this.taskState);
}
} else {
stream = this.converter.processStream(stream, this.taskState);
}
stream = this.rowLevelPolicyChecker.processStream(stream, this.taskState);
stream = stream.mapStream(s -> s.map(r -> {
onEachRecord();
return r;
}));
stream = stream.mapStream(s -> s.doOnSubscribe(subscription -> onStart()));
stream = stream.mapStream(s -> s.doOnComplete(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
stream = stream.mapStream(s -> s.doOnCancel(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
stream = stream.mapStream(s -> s.doOnError(exc -> {
verifyAndSetForkState(ForkState.RUNNING, ForkState.FAILED);
this.logger.error(String.format("Fork %d of task %s failed to process data records", this.index, this.taskId), exc);
}));
stream = stream.mapStream(s -> s.doFinally(this::cleanup));
stream.getRecordStream().subscribe(r -> {
if (r instanceof RecordEnvelope) {
this.writer.get().writeEnvelope((RecordEnvelope) r);
} else if (r instanceof ControlMessage) {
this.writer.get().getMessageHandler().handleMessage((ControlMessage) r);
r.ack();
}
}, e -> logger.error("Failed to process record.", e), () -> {
if (this.writer.isPresent()) {
this.writer.get().close();
}
});
}
Aggregations