Search in sources :

Example 1 with ReaderInvocationUtil

use of org.apache.beam.runners.flink.metrics.ReaderInvocationUtil in project beam by apache.

the class UnboundedSourceWrapper method run.

@Override
public void run(SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx) throws Exception {
    context = ctx;
    FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
    ReaderInvocationUtil<OutputT, UnboundedSource.UnboundedReader<OutputT>> readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.getPipelineOptions(), metricContainer);
    if (localReaders.size() == 0) {
        // do nothing, but still look busy ...
        // also, output a Long.MAX_VALUE watermark since we know that we're not
        // going to emit anything
        // we can't return here since Flink requires that all operators stay up,
        // otherwise checkpointing would not work correctly anymore
        ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
        // wait until this is canceled
        final Object waitLock = new Object();
        while (isRunning) {
            try {
                //noinspection SynchronizationOnLocalVariableOrMethodParameter
                synchronized (waitLock) {
                    // don't wait indefinitely, in case something goes horribly wrong
                    waitLock.wait(1000);
                }
            } catch (InterruptedException e) {
                if (!isRunning) {
                    // restore the interrupted state, and fall through the loop
                    Thread.currentThread().interrupt();
                }
            }
        }
    } else if (localReaders.size() == 1) {
        // the easy case, we just read from one reader
        UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(0);
        boolean dataAvailable = readerInvoker.invokeStart(reader);
        if (dataAvailable) {
            emitElement(ctx, reader);
        }
        setNextWatermarkTimer(this.runtimeContext);
        while (isRunning) {
            dataAvailable = readerInvoker.invokeAdvance(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
            } else {
                Thread.sleep(50);
            }
        }
    } else {
        // a bit more complicated, we are responsible for several localReaders
        // loop through them and sleep if none of them had any data
        int numReaders = localReaders.size();
        int currentReader = 0;
        // start each reader and emit data if immediately available
        for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) {
            boolean dataAvailable = readerInvoker.invokeStart(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
            }
        }
        // a flag telling us whether any of the localReaders had data
        // if no reader had data, sleep for bit
        boolean hadData = false;
        while (isRunning) {
            UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(currentReader);
            boolean dataAvailable = readerInvoker.invokeAdvance(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
                hadData = true;
            }
            currentReader = (currentReader + 1) % numReaders;
            if (currentReader == 0 && !hadData) {
                Thread.sleep(50);
            } else if (currentReader == 0) {
                hadData = false;
            }
        }
    }
}
Also used : ReaderInvocationUtil(org.apache.beam.runners.flink.metrics.ReaderInvocationUtil) Watermark(org.apache.flink.streaming.api.watermark.Watermark) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer)

Example 2 with ReaderInvocationUtil

use of org.apache.beam.runners.flink.metrics.ReaderInvocationUtil in project beam by apache.

the class BoundedSourceWrapper method run.

@Override
public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
    // figure out which split sources we're responsible for
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    int numSubtasks = getRuntimeContext().getNumberOfParallelSubtasks();
    List<BoundedSource<OutputT>> localSources = new ArrayList<>();
    for (int i = 0; i < splitSources.size(); i++) {
        if (i % numSubtasks == subtaskIndex) {
            localSources.add(splitSources.get(i));
        }
    }
    LOG.info("Bounded Flink Source {}/{} is reading from sources: {}", subtaskIndex, numSubtasks, localSources);
    FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext());
    ReaderInvocationUtil<OutputT, BoundedSource.BoundedReader<OutputT>> readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.getPipelineOptions(), metricContainer);
    readers = new ArrayList<>();
    // initialize readers from scratch
    for (BoundedSource<OutputT> source : localSources) {
        readers.add(source.createReader(serializedOptions.getPipelineOptions()));
    }
    if (readers.size() == 1) {
        // the easy case, we just read from one reader
        BoundedSource.BoundedReader<OutputT> reader = readers.get(0);
        boolean dataAvailable = readerInvoker.invokeStart(reader);
        if (dataAvailable) {
            emitElement(ctx, reader);
        }
        while (isRunning) {
            dataAvailable = readerInvoker.invokeAdvance(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
            } else {
                break;
            }
        }
    } else {
        // a bit more complicated, we are responsible for several readers
        // loop through them and sleep if none of them had any data
        int currentReader = 0;
        // start each reader and emit data if immediately available
        for (BoundedSource.BoundedReader<OutputT> reader : readers) {
            boolean dataAvailable = readerInvoker.invokeStart(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
            }
        }
        // a flag telling us whether any of the readers had data
        // if no reader had data, sleep for bit
        boolean hadData = false;
        while (isRunning && !readers.isEmpty()) {
            BoundedSource.BoundedReader<OutputT> reader = readers.get(currentReader);
            boolean dataAvailable = readerInvoker.invokeAdvance(reader);
            if (dataAvailable) {
                emitElement(ctx, reader);
                hadData = true;
            } else {
                readers.remove(currentReader);
                currentReader--;
                if (readers.isEmpty()) {
                    break;
                }
            }
            currentReader = (currentReader + 1) % readers.size();
            if (currentReader == 0 && !hadData) {
                Thread.sleep(50);
            } else if (currentReader == 0) {
                hadData = false;
            }
        }
    }
    // emit final Long.MAX_VALUE watermark, just to be sure
    ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
}
Also used : ReaderInvocationUtil(org.apache.beam.runners.flink.metrics.ReaderInvocationUtil) BoundedSource(org.apache.beam.sdk.io.BoundedSource) ArrayList(java.util.ArrayList) Watermark(org.apache.flink.streaming.api.watermark.Watermark) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer)

Aggregations

FlinkMetricContainer (org.apache.beam.runners.flink.metrics.FlinkMetricContainer)2 ReaderInvocationUtil (org.apache.beam.runners.flink.metrics.ReaderInvocationUtil)2 Watermark (org.apache.flink.streaming.api.watermark.Watermark)2 ArrayList (java.util.ArrayList)1 BoundedSource (org.apache.beam.sdk.io.BoundedSource)1