use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.
the class FlinkStreamingTransformTranslatorsTest method readSourceTranslatorUnboundedWithoutMaxParallelism.
@Test
public void readSourceTranslatorUnboundedWithoutMaxParallelism() {
final int parallelism = 2;
SplittableParDo.PrimitiveUnboundedRead<String> transform = new SplittableParDo.PrimitiveUnboundedRead<>(Read.from(new TestUnboundedSource()));
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);
UnboundedSourceWrapper source = (UnboundedSourceWrapper) SourceTransformationCompat.getOperator(StreamSources.getOnlyInput(sourceTransform)).getUserFunction();
assertEquals(parallelism, source.getSplitSources().size());
}
use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.
the class BoundedSourceRestoreTest method testRestore.
@Test
public void testRestore() throws Exception {
final int numElements = 102;
final int firstBatchSize = 23;
final int secondBatchSize = numElements - firstBatchSize;
final Set<Long> emittedElements = new HashSet<>();
final Object checkpointLock = new Object();
PipelineOptions options = PipelineOptionsFactory.create();
// bounded source wrapped as unbounded source
BoundedSource<Long> source = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(source);
UnboundedSourceWrapper<Long, Checkpoint<Long>> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, unboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> sourceOperator = new StreamSource<>(flinkWrapper);
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> testHarness = new AbstractStreamOperatorTestHarness<>(sourceOperator, numTasks, /* max parallelism */
numTasks, /* parallelism */
0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// the first half of elements is read
boolean readFirstBatchOfElements = false;
try {
testHarness.open();
StreamSources.run(sourceOperator, checkpointLock, new PartialCollector<>(emittedElements, firstBatchSize));
} catch (SuccessException e) {
// success
readFirstBatchOfElements = true;
}
assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
// draw a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// finalize checkpoint
final ArrayList<Integer> finalizeList = new ArrayList<>();
TestCountingSource.setFinalizeTracker(finalizeList);
testHarness.notifyOfCompletedCheckpoint(0);
// create a completely new source but restore from the snapshot
BoundedSource<Long> restoredSource = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> restoredUnboundedSource = new BoundedToUnboundedSourceAdapter<>(restoredSource);
UnboundedSourceWrapper<Long, Checkpoint<Long>> restoredFlinkWrapper = new UnboundedSourceWrapper<>("stepName", options, restoredUnboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> restoredSourceOperator = new StreamSource<>(restoredFlinkWrapper);
// set parallelism to 1 to ensure that our testing operator gets all checkpointed state
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> restoredTestHarness = new AbstractStreamOperatorTestHarness<>(restoredSourceOperator, numTasks, /* max parallelism */
1, /* parallelism */
0);
restoredTestHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// restore snapshot
restoredTestHarness.initializeState(snapshot);
// run again and verify that we see the other elements
boolean readSecondBatchOfElements = false;
try {
restoredTestHarness.open();
StreamSources.run(restoredSourceOperator, checkpointLock, new PartialCollector<>(emittedElements, secondBatchSize));
} catch (SuccessException e) {
// success
readSecondBatchOfElements = true;
}
assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
// verify that we saw all NUM_ELEMENTS elements
assertTrue(emittedElements.size() == numElements);
}
use of org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper in project beam by apache.
the class FlinkStreamingTransformTranslatorsTest method readSourceTranslatorUnboundedWithMaxParallelism.
@Test
public void readSourceTranslatorUnboundedWithMaxParallelism() {
final int maxParallelism = 6;
final int parallelism = 2;
SplittableParDo.PrimitiveUnboundedRead<String> transform = new SplittableParDo.PrimitiveUnboundedRead<>(Read.from(new TestUnboundedSource()));
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.setMaxParallelism(maxParallelism);
OneInputTransformation<?, ?> sourceTransform = (OneInputTransformation) applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);
UnboundedSourceWrapper source = (UnboundedSourceWrapper) SourceTransformationCompat.getOperator(StreamSources.getOnlyInput(sourceTransform)).getUserFunction();
assertEquals(maxParallelism, source.getSplitSources().size());
}
Aggregations