use of org.apache.beam.runners.flink.metrics.ReaderInvocationUtil in project beam by apache.
the class UnboundedSourceWrapper method run.
@Override
public void run(SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx) throws Exception {
context = ctx;
FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
ReaderInvocationUtil<OutputT, UnboundedSource.UnboundedReader<OutputT>> readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.getPipelineOptions(), metricContainer);
if (localReaders.size() == 0) {
// do nothing, but still look busy ...
// also, output a Long.MAX_VALUE watermark since we know that we're not
// going to emit anything
// we can't return here since Flink requires that all operators stay up,
// otherwise checkpointing would not work correctly anymore
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
// wait until this is canceled
final Object waitLock = new Object();
while (isRunning) {
try {
//noinspection SynchronizationOnLocalVariableOrMethodParameter
synchronized (waitLock) {
// don't wait indefinitely, in case something goes horribly wrong
waitLock.wait(1000);
}
} catch (InterruptedException e) {
if (!isRunning) {
// restore the interrupted state, and fall through the loop
Thread.currentThread().interrupt();
}
}
}
} else if (localReaders.size() == 1) {
// the easy case, we just read from one reader
UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(0);
boolean dataAvailable = readerInvoker.invokeStart(reader);
if (dataAvailable) {
emitElement(ctx, reader);
}
setNextWatermarkTimer(this.runtimeContext);
while (isRunning) {
dataAvailable = readerInvoker.invokeAdvance(reader);
if (dataAvailable) {
emitElement(ctx, reader);
} else {
Thread.sleep(50);
}
}
} else {
// a bit more complicated, we are responsible for several localReaders
// loop through them and sleep if none of them had any data
int numReaders = localReaders.size();
int currentReader = 0;
// start each reader and emit data if immediately available
for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) {
boolean dataAvailable = readerInvoker.invokeStart(reader);
if (dataAvailable) {
emitElement(ctx, reader);
}
}
// a flag telling us whether any of the localReaders had data
// if no reader had data, sleep for bit
boolean hadData = false;
while (isRunning) {
UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(currentReader);
boolean dataAvailable = readerInvoker.invokeAdvance(reader);
if (dataAvailable) {
emitElement(ctx, reader);
hadData = true;
}
currentReader = (currentReader + 1) % numReaders;
if (currentReader == 0 && !hadData) {
Thread.sleep(50);
} else if (currentReader == 0) {
hadData = false;
}
}
}
}
use of org.apache.beam.runners.flink.metrics.ReaderInvocationUtil in project beam by apache.
the class BoundedSourceWrapper method run.
@Override
public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
// figure out which split sources we're responsible for
int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
int numSubtasks = getRuntimeContext().getNumberOfParallelSubtasks();
List<BoundedSource<OutputT>> localSources = new ArrayList<>();
for (int i = 0; i < splitSources.size(); i++) {
if (i % numSubtasks == subtaskIndex) {
localSources.add(splitSources.get(i));
}
}
LOG.info("Bounded Flink Source {}/{} is reading from sources: {}", subtaskIndex, numSubtasks, localSources);
FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
ReaderInvocationUtil<OutputT, BoundedSource.BoundedReader<OutputT>> readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.getPipelineOptions(), metricContainer);
readers = new ArrayList<>();
// initialize readers from scratch
for (BoundedSource<OutputT> source : localSources) {
readers.add(source.createReader(serializedOptions.getPipelineOptions()));
}
if (readers.size() == 1) {
// the easy case, we just read from one reader
BoundedSource.BoundedReader<OutputT> reader = readers.get(0);
boolean dataAvailable = readerInvoker.invokeStart(reader);
if (dataAvailable) {
emitElement(ctx, reader);
}
while (isRunning) {
dataAvailable = readerInvoker.invokeAdvance(reader);
if (dataAvailable) {
emitElement(ctx, reader);
} else {
break;
}
}
} else {
// a bit more complicated, we are responsible for several readers
// loop through them and sleep if none of them had any data
int currentReader = 0;
// start each reader and emit data if immediately available
for (BoundedSource.BoundedReader<OutputT> reader : readers) {
boolean dataAvailable = readerInvoker.invokeStart(reader);
if (dataAvailable) {
emitElement(ctx, reader);
}
}
// a flag telling us whether any of the readers had data
// if no reader had data, sleep for bit
boolean hadData = false;
while (isRunning && !readers.isEmpty()) {
BoundedSource.BoundedReader<OutputT> reader = readers.get(currentReader);
boolean dataAvailable = readerInvoker.invokeAdvance(reader);
if (dataAvailable) {
emitElement(ctx, reader);
hadData = true;
} else {
readers.remove(currentReader);
currentReader--;
if (readers.isEmpty()) {
break;
}
}
currentReader = (currentReader + 1) % readers.size();
if (currentReader == 0 && !hadData) {
Thread.sleep(50);
} else if (currentReader == 0) {
hadData = false;
}
}
}
// emit final Long.MAX_VALUE watermark, just to be sure
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
}
Aggregations