use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter in project beam by apache.
the class BoundedSourceRestoreTest method testRestore.
@Test
public void testRestore() throws Exception {
final int numElements = 102;
final int firstBatchSize = 23;
final int secondBatchSize = numElements - firstBatchSize;
final Set<Long> emittedElements = new HashSet<>();
final Object checkpointLock = new Object();
PipelineOptions options = PipelineOptionsFactory.create();
// bounded source wrapped as unbounded source
BoundedSource<Long> source = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(source);
UnboundedSourceWrapper<Long, Checkpoint<Long>> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, unboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> sourceOperator = new StreamSource<>(flinkWrapper);
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> testHarness = new AbstractStreamOperatorTestHarness<>(sourceOperator, numTasks, /* max parallelism */
numTasks, /* parallelism */
0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// the first half of elements is read
boolean readFirstBatchOfElements = false;
try {
testHarness.open();
StreamSources.run(sourceOperator, checkpointLock, new PartialCollector<>(emittedElements, firstBatchSize));
} catch (SuccessException e) {
// success
readFirstBatchOfElements = true;
}
assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
// draw a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// finalize checkpoint
final ArrayList<Integer> finalizeList = new ArrayList<>();
TestCountingSource.setFinalizeTracker(finalizeList);
testHarness.notifyOfCompletedCheckpoint(0);
// create a completely new source but restore from the snapshot
BoundedSource<Long> restoredSource = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> restoredUnboundedSource = new BoundedToUnboundedSourceAdapter<>(restoredSource);
UnboundedSourceWrapper<Long, Checkpoint<Long>> restoredFlinkWrapper = new UnboundedSourceWrapper<>("stepName", options, restoredUnboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> restoredSourceOperator = new StreamSource<>(restoredFlinkWrapper);
// set parallelism to 1 to ensure that our testing operator gets all checkpointed state
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> restoredTestHarness = new AbstractStreamOperatorTestHarness<>(restoredSourceOperator, numTasks, /* max parallelism */
1, /* parallelism */
0);
restoredTestHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// restore snapshot
restoredTestHarness.initializeState(snapshot);
// run again and verify that we see the other elements
boolean readSecondBatchOfElements = false;
try {
restoredTestHarness.open();
StreamSources.run(restoredSourceOperator, checkpointLock, new PartialCollector<>(emittedElements, secondBatchSize));
} catch (SuccessException e) {
// success
readSecondBatchOfElements = true;
}
assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
// verify that we saw all NUM_ELEMENTS elements
assertTrue(emittedElements.size() == numElements);
}
use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testBoundedToUnboundedSourceAdapterCheckpoint.
private <T> void testBoundedToUnboundedSourceAdapterCheckpoint(BoundedSource<T> boundedSource, List<T> expectedElements) throws Exception {
BoundedToUnboundedSourceAdapter<T> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource);
PipelineOptions options = PipelineOptionsFactory.create();
BoundedToUnboundedSourceAdapter<T>.Reader reader = unboundedSource.createReader(options, null);
List<T> actual = Lists.newArrayList();
for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) {
actual.add(reader.getCurrent());
// checkpoint every 9 elements
if (actual.size() % 9 == 0) {
Checkpoint<T> checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
}
}
Checkpoint<T> checkpointDone = reader.getCheckpointMark();
assertTrue(checkpointDone.getResidualElements() == null || checkpointDone.getResidualElements().isEmpty());
assertEquals(expectedElements.size(), actual.size());
assertEquals(Sets.newHashSet(expectedElements), Sets.newHashSet(actual));
}
use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testInvokesSplitWithDefaultNumSplitsTooLarge.
@Test
public void testInvokesSplitWithDefaultNumSplitsTooLarge() throws Exception {
UnboundedSource<Long, ?> unboundedCountingSource = new BoundedToUnboundedSourceAdapter<Long>(CountingSource.upTo(1));
PipelineOptions options = PipelineOptionsFactory.create();
List<?> splits = unboundedCountingSource.split(100, options);
assertEquals(1, splits.size());
assertNotEquals(splits.get(0), unboundedCountingSource);
}
use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testInvokingSplitProducesAtLeastOneSplit.
@Test
public void testInvokingSplitProducesAtLeastOneSplit() throws Exception {
UnboundedSource<Long, ?> unboundedCountingSource = new BoundedToUnboundedSourceAdapter<Long>(CountingSource.upTo(0));
PipelineOptions options = PipelineOptionsFactory.create();
List<?> splits = unboundedCountingSource.split(100, options);
assertEquals(1, splits.size());
assertNotEquals(splits.get(0), unboundedCountingSource);
}
Aggregations