Search in sources :

Example 1 with UnboundedSourceWrapper

use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.

the class FlinkStreamingTransformTranslatorsTest method readSourceTranslatorUnboundedWithoutMaxParallelism.

@Test
public void readSourceTranslatorUnboundedWithoutMaxParallelism() {
    final int parallelism = 2;
    SplittableParDo.PrimitiveUnboundedRead<String> transform = new SplittableParDo.PrimitiveUnboundedRead<>(Read.from(new TestUnboundedSource()));
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);
    UnboundedSourceWrapper source = (UnboundedSourceWrapper) SourceTransformationCompat.getOperator(StreamSources.getOnlyInput(sourceTransform)).getUserFunction();
    assertEquals(parallelism, source.getSplitSources().size());
}
Also used : SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) UnboundedSourceWrapper(org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 2 with UnboundedSourceWrapper

use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.

the class BoundedSourceRestoreTest method testRestore.

@Test
public void testRestore() throws Exception {
    final int numElements = 102;
    final int firstBatchSize = 23;
    final int secondBatchSize = numElements - firstBatchSize;
    final Set<Long> emittedElements = new HashSet<>();
    final Object checkpointLock = new Object();
    PipelineOptions options = PipelineOptionsFactory.create();
    // bounded source wrapped as unbounded source
    BoundedSource<Long> source = CountingSource.upTo(numElements);
    BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(source);
    UnboundedSourceWrapper<Long, Checkpoint<Long>> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, unboundedSource, numSplits);
    StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> sourceOperator = new StreamSource<>(flinkWrapper);
    AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> testHarness = new AbstractStreamOperatorTestHarness<>(sourceOperator, numTasks, /* max parallelism */
    numTasks, /* parallelism */
    0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    // the first half of elements is read
    boolean readFirstBatchOfElements = false;
    try {
        testHarness.open();
        StreamSources.run(sourceOperator, checkpointLock, new PartialCollector<>(emittedElements, firstBatchSize));
    } catch (SuccessException e) {
        // success
        readFirstBatchOfElements = true;
    }
    assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
    // draw a snapshot
    OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
    // finalize checkpoint
    final ArrayList<Integer> finalizeList = new ArrayList<>();
    TestCountingSource.setFinalizeTracker(finalizeList);
    testHarness.notifyOfCompletedCheckpoint(0);
    // create a completely new source but restore from the snapshot
    BoundedSource<Long> restoredSource = CountingSource.upTo(numElements);
    BoundedToUnboundedSourceAdapter<Long> restoredUnboundedSource = new BoundedToUnboundedSourceAdapter<>(restoredSource);
    UnboundedSourceWrapper<Long, Checkpoint<Long>> restoredFlinkWrapper = new UnboundedSourceWrapper<>("stepName", options, restoredUnboundedSource, numSplits);
    StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> restoredSourceOperator = new StreamSource<>(restoredFlinkWrapper);
    // set parallelism to 1 to ensure that our testing operator gets all checkpointed state
    AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> restoredTestHarness = new AbstractStreamOperatorTestHarness<>(restoredSourceOperator, numTasks, /* max parallelism */
    1, /* parallelism */
    0);
    restoredTestHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    // restore snapshot
    restoredTestHarness.initializeState(snapshot);
    // run again and verify that we see the other elements
    boolean readSecondBatchOfElements = false;
    try {
        restoredTestHarness.open();
        StreamSources.run(restoredSourceOperator, checkpointLock, new PartialCollector<>(emittedElements, secondBatchSize));
    } catch (SuccessException e) {
        // success
        readSecondBatchOfElements = true;
    }
    assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
    // verify that we saw all NUM_ELEMENTS elements
    assertTrue(emittedElements.size() == numElements);
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) Checkpoint(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) Checkpoint(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint) BoundedToUnboundedSourceAdapter(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) UnboundedSourceWrapper(org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with UnboundedSourceWrapper

use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.

the class FlinkStreamingTransformTranslatorsTest method readSourceTranslatorUnboundedWithMaxParallelism.

@Test
public void readSourceTranslatorUnboundedWithMaxParallelism() {
    final int maxParallelism = 6;
    final int parallelism = 2;
    SplittableParDo.PrimitiveUnboundedRead<String> transform = new SplittableParDo.PrimitiveUnboundedRead<>(Read.from(new TestUnboundedSource()));
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.setMaxParallelism(maxParallelism);
    OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);
    UnboundedSourceWrapper source = (UnboundedSourceWrapper) SourceTransformationCompat.getOperator(StreamSources.getOnlyInput(sourceTransform)).getUserFunction();
    assertEquals(maxParallelism, source.getSplitSources().size());
}
Also used : SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) UnboundedSourceWrapper(org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Aggregations

UnboundedSourceWrapper (org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper)3 Test (org.junit.Test)3 SplittableParDo (org.apache.beam.runners.core.construction.SplittableParDo)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)1 Checkpoint (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)1 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)1 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)1