use of org.apache.gobblin.records.RecordStreamWithMetadata in project incubator-gobblin by apache.
the class ForkerTest method test.
@Test
public void test() throws Exception {
Forker forker = new Forker();
MyFlowable<StreamEntity<byte[]>> flowable = new MyFlowable<>();
RecordStreamWithMetadata<byte[], String> stream = new RecordStreamWithMetadata<>(flowable, GlobalMetadata.<String>builder().schema("schema").build());
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "3");
Forker.ForkedStream<byte[], String> forkedStream = forker.forkStream(stream, new MyForkOperator(), workUnitState);
Assert.assertEquals(forkedStream.getForkedStreams().size(), 3);
Queue<StreamEntity<byte[]>> output0 = new LinkedList<>();
forkedStream.getForkedStreams().get(0).getRecordStream().subscribe(output0::add);
Queue<StreamEntity<byte[]>> output1 = new LinkedList<>();
forkedStream.getForkedStreams().get(1).getRecordStream().subscribe(output1::add);
Queue<StreamEntity<byte[]>> output2 = new LinkedList<>();
forkedStream.getForkedStreams().get(2).getRecordStream().subscribe(output2::add);
flowable._subscriber.onNext(new RecordEnvelope<>(new byte[] { 1, 1, 1 }));
Assert.assertTrue(output0.poll() instanceof RecordEnvelope);
Assert.assertTrue(output1.poll() instanceof RecordEnvelope);
Assert.assertTrue(output2.poll() instanceof RecordEnvelope);
flowable._subscriber.onNext(new RecordEnvelope<>(new byte[] { 1, 0, 0 }));
Assert.assertTrue(output0.poll() instanceof RecordEnvelope);
Assert.assertNull(output1.poll());
Assert.assertNull(output2.poll());
flowable._subscriber.onNext(new RecordEnvelope<>(new byte[] { 0, 1, 1 }));
Assert.assertNull(output0.poll());
Assert.assertTrue(output1.poll() instanceof RecordEnvelope);
Assert.assertTrue(output2.poll() instanceof RecordEnvelope);
flowable._subscriber.onNext(new BasicTestControlMessage<byte[]>("control"));
Assert.assertTrue(output0.poll() instanceof BasicTestControlMessage);
Assert.assertTrue(output1.poll() instanceof BasicTestControlMessage);
Assert.assertTrue(output2.poll() instanceof BasicTestControlMessage);
flowable._subscriber.onComplete();
}
use of org.apache.gobblin.records.RecordStreamWithMetadata in project incubator-gobblin by apache.
the class AsyncConverter1to1Test method testFailedConversion.
@Test
public void testFailedConversion() throws Exception {
MyAsyncConverter1to1 converter = new MyAsyncConverter1to1();
List<Throwable> errors = Lists.newArrayList();
AtomicBoolean done = new AtomicBoolean(false);
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(AsyncConverter1to1.MAX_CONCURRENT_ASYNC_CONVERSIONS_KEY, 3);
RecordStreamWithMetadata<String, String> stream = new RecordStreamWithMetadata<>(Flowable.just("0", MyAsyncConverter1to1.FAIL, "1").map(RecordEnvelope::new), GlobalMetadata.<String>builder().schema("schema").build());
Set<String> outputRecords = Sets.newConcurrentHashSet();
converter.processStream(stream, workUnitState).getRecordStream().subscribeOn(Schedulers.newThread()).subscribe(r -> outputRecords.add(((RecordEnvelope<String>) r).getRecord()), errors::add, () -> done.set(true));
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> errors.size() > 0).await());
Assert.assertEquals(errors.size(), 1);
Assert.assertEquals(errors.get(0).getCause().getMessage(), "injected failure");
}
use of org.apache.gobblin.records.RecordStreamWithMetadata in project incubator-gobblin by apache.
the class AsyncConverter1to1Test method test1to1.
@Test
public void test1to1() throws Exception {
MyAsyncConverter1to1 converter = new MyAsyncConverter1to1();
List<Throwable> errors = Lists.newArrayList();
AtomicBoolean done = new AtomicBoolean(false);
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(AsyncConverter1to1.MAX_CONCURRENT_ASYNC_CONVERSIONS_KEY, 3);
RecordStreamWithMetadata<String, String> stream = new RecordStreamWithMetadata<>(Flowable.range(0, 5).map(i -> i.toString()).map(RecordEnvelope::new), GlobalMetadata.<String>builder().schema("schema").build());
Set<String> outputRecords = Sets.newConcurrentHashSet();
converter.processStream(stream, workUnitState).getRecordStream().subscribeOn(Schedulers.newThread()).subscribe(r -> outputRecords.add(((RecordEnvelope<String>) r).getRecord()), errors::add, () -> done.set(true));
// Release record 0
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("0")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("0")).await());
Assert.assertEquals(outputRecords.size(), 1);
// Record 4 should not be in the queue yet (max concurrent conversions is 3).
Assert.assertFalse(ExponentialBackoff.awaitCondition().maxWait(200L).callable(() -> converter.completeFutureIfPresent("4")).await());
// Release record 3 (out of order)
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("3")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("3")).await());
// only two records have been released
Assert.assertEquals(outputRecords.size(), 2);
// Release record 4 (now in queue)
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("4")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("4")).await());
Assert.assertEquals(outputRecords.size(), 3);
// Release records 1 and 2
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("1")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("2")).await());
Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.size() == 5).await());
Assert.assertEquals(outputRecords, Sets.newHashSet("0", "1", "2", "3", "4"));
Assert.assertTrue(errors.isEmpty());
Assert.assertTrue(done.get());
}
use of org.apache.gobblin.records.RecordStreamWithMetadata in project incubator-gobblin by apache.
the class StreamModelTaskRunner method run.
protected void run() throws Exception {
// Get the fork operator. By default IdentityForkOperator is used with a single branch.
ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
RecordStreamWithMetadata<?, ?> stream = this.extractor.recordStream(this.shutdownRequested);
ConnectableFlowable connectableStream = stream.getRecordStream().publish();
stream = stream.withRecordStream(connectableStream);
stream = stream.mapRecords(r -> {
this.task.onRecordExtract();
return r;
});
if (this.task.isStreamingTask()) {
// Start watermark manager and tracker
if (this.watermarkTracker.isPresent()) {
this.watermarkTracker.get().start();
}
this.watermarkManager.get().start();
((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());
stream = stream.mapRecords(r -> {
AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(r.getWatermark());
if (watermarkTracker.isPresent()) {
watermarkTracker.get().track(ackableWatermark);
}
r.addCallBack(ackableWatermark);
return r;
});
}
// Use the recordStreamProcessor list if it is configured. This list can contain both all RecordStreamProcessor types
if (!this.recordStreamProcessors.isEmpty()) {
for (RecordStreamProcessor streamProcessor : this.recordStreamProcessors) {
stream = streamProcessor.processStream(stream, this.taskState);
}
} else {
if (this.converter instanceof MultiConverter) {
// if multiconverter, unpack it
for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
stream = cverter.processStream(stream, this.taskState);
}
} else {
stream = this.converter.processStream(stream, this.taskState);
}
}
stream = this.rowChecker.processStream(stream, this.taskState);
Forker.ForkedStream<?, ?> forkedStreams = new Forker().forkStream(stream, forkOperator, this.taskState);
boolean isForkAsync = !this.task.areSingleBranchTasksSynchronous(this.taskContext) || forkedStreams.getForkedStreams().size() > 1;
int bufferSize = this.taskState.getPropAsInt(ConfigurationKeys.FORK_RECORD_QUEUE_CAPACITY_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_CAPACITY);
for (int fidx = 0; fidx < forkedStreams.getForkedStreams().size(); fidx++) {
RecordStreamWithMetadata<?, ?> forkedStream = forkedStreams.getForkedStreams().get(fidx);
if (forkedStream != null) {
if (isForkAsync) {
forkedStream = forkedStream.mapStream(f -> f.observeOn(Schedulers.from(this.taskExecutor.getForkExecutor()), false, bufferSize));
}
Fork fork = new Fork(this.taskContext, forkedStream.getGlobalMetadata().getSchema(), forkedStreams.getForkedStreams().size(), fidx, this.taskMode);
fork.consumeRecordStream(forkedStream);
this.forks.put(Optional.of(fork), Optional.of(Futures.immediateFuture(null)));
this.task.configureStreamingFork(fork, this.watermarkingStrategy);
}
}
connectableStream.connect();
if (!ExponentialBackoff.awaitCondition().callable(() -> this.forks.keySet().stream().map(Optional::get).allMatch(Fork::isDone)).initialDelay(1000L).maxDelay(1000L).maxWait(TimeUnit.MINUTES.toMillis(60)).await()) {
throw new TimeoutException("Forks did not finish withing specified timeout.");
}
}
use of org.apache.gobblin.records.RecordStreamWithMetadata in project incubator-gobblin by apache.
the class Fork method consumeRecordStream.
@SuppressWarnings(value = "RV_RETURN_VALUE_IGNORED", justification = "We actually don't care about the return value of subscribe.")
public void consumeRecordStream(RecordStreamWithMetadata<D, S> stream) throws RecordStreamProcessor.StreamProcessingException {
if (this.converter instanceof MultiConverter) {
// if multiconverter, unpack it
for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
stream = cverter.processStream(stream, this.taskState);
}
} else {
stream = this.converter.processStream(stream, this.taskState);
}
stream = this.rowLevelPolicyChecker.processStream(stream, this.taskState);
stream = stream.mapStream(s -> s.map(r -> {
onEachRecord();
return r;
}));
stream = stream.mapStream(s -> s.doOnSubscribe(subscription -> onStart()));
stream = stream.mapStream(s -> s.doOnComplete(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
stream = stream.mapStream(s -> s.doOnCancel(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
stream = stream.mapStream(s -> s.doOnError(exc -> {
verifyAndSetForkState(ForkState.RUNNING, ForkState.FAILED);
this.logger.error(String.format("Fork %d of task %s failed to process data records", this.index, this.taskId), exc);
}));
stream = stream.mapStream(s -> s.doFinally(this::cleanup));
stream.getRecordStream().subscribe(r -> {
if (r instanceof RecordEnvelope) {
this.writer.get().writeEnvelope((RecordEnvelope) r);
} else if (r instanceof ControlMessage) {
this.writer.get().getMessageHandler().handleMessage((ControlMessage) r);
r.ack();
}
}, e -> logger.error("Failed to process record.", e), () -> {
if (this.writer.isPresent()) {
this.writer.get().close();
}
});
}
Aggregations