use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testBoundedToUnboundedSourceAdapterCheckpointRestart.
private <T> void testBoundedToUnboundedSourceAdapterCheckpointRestart(BoundedSource<T> boundedSource, List<T> expectedElements) throws Exception {
BoundedToUnboundedSourceAdapter<T> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource);
PipelineOptions options = PipelineOptionsFactory.create();
BoundedToUnboundedSourceAdapter<T>.Reader reader = unboundedSource.createReader(options, null);
List<T> actual = Lists.newArrayList();
for (boolean hasNext = reader.start(); hasNext; ) {
actual.add(reader.getCurrent());
// checkpoint every 9 elements
if (actual.size() % 9 == 0) {
Checkpoint<T> checkpoint = reader.getCheckpointMark();
Coder<Checkpoint<T>> checkpointCoder = unboundedSource.getCheckpointMarkCoder();
Checkpoint<T> decodedCheckpoint = CoderUtils.decodeFromByteArray(checkpointCoder, CoderUtils.encodeToByteArray(checkpointCoder, checkpoint));
reader.close();
checkpoint.finalizeCheckpoint();
BoundedToUnboundedSourceAdapter<T>.Reader restarted = unboundedSource.createReader(options, decodedCheckpoint);
reader = restarted;
hasNext = reader.start();
} else {
hasNext = reader.advance();
}
}
Checkpoint<T> checkpointDone = reader.getCheckpointMark();
assertTrue(checkpointDone.getResidualElements() == null || checkpointDone.getResidualElements().isEmpty());
assertEquals(expectedElements.size(), actual.size());
assertEquals(Sets.newHashSet(expectedElements), Sets.newHashSet(actual));
}
use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint in project beam by apache.
the class UnboundedReadFromBoundedSourceTest method testReadFromCheckpointBeforeStart.
@Test
public void testReadFromCheckpointBeforeStart() throws Exception {
thrown.expect(NoSuchElementException.class);
BoundedSource<Long> countingSource = CountingSource.upTo(100);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(countingSource);
PipelineOptions options = PipelineOptionsFactory.create();
List<TimestampedValue<Long>> elements = ImmutableList.of(TimestampedValue.of(1L, new Instant(1L)));
Checkpoint<Long> checkpoint = new Checkpoint<>(elements, countingSource);
unboundedSource.createReader(options, checkpoint).getCurrent();
}
use of org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint in project beam by apache.
the class BoundedSourceRestoreTest method testRestore.
@Test
public void testRestore() throws Exception {
final int numElements = 102;
final int firstBatchSize = 23;
final int secondBatchSize = numElements - firstBatchSize;
final Set<Long> emittedElements = new HashSet<>();
final Object checkpointLock = new Object();
PipelineOptions options = PipelineOptionsFactory.create();
// bounded source wrapped as unbounded source
BoundedSource<Long> source = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(source);
UnboundedSourceWrapper<Long, Checkpoint<Long>> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, unboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> sourceOperator = new StreamSource<>(flinkWrapper);
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> testHarness = new AbstractStreamOperatorTestHarness<>(sourceOperator, numTasks, /* max parallelism */
numTasks, /* parallelism */
0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// the first half of elements is read
boolean readFirstBatchOfElements = false;
try {
testHarness.open();
StreamSources.run(sourceOperator, checkpointLock, new PartialCollector<>(emittedElements, firstBatchSize));
} catch (SuccessException e) {
// success
readFirstBatchOfElements = true;
}
assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
// draw a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// finalize checkpoint
final ArrayList<Integer> finalizeList = new ArrayList<>();
TestCountingSource.setFinalizeTracker(finalizeList);
testHarness.notifyOfCompletedCheckpoint(0);
// create a completely new source but restore from the snapshot
BoundedSource<Long> restoredSource = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> restoredUnboundedSource = new BoundedToUnboundedSourceAdapter<>(restoredSource);
UnboundedSourceWrapper<Long, Checkpoint<Long>> restoredFlinkWrapper = new UnboundedSourceWrapper<>("stepName", options, restoredUnboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> restoredSourceOperator = new StreamSource<>(restoredFlinkWrapper);
// set parallelism to 1 to ensure that our testing operator gets all checkpointed state
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> restoredTestHarness = new AbstractStreamOperatorTestHarness<>(restoredSourceOperator, numTasks, /* max parallelism */
1, /* parallelism */
0);
restoredTestHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// restore snapshot
restoredTestHarness.initializeState(snapshot);
// run again and verify that we see the other elements
boolean readSecondBatchOfElements = false;
try {
restoredTestHarness.open();
StreamSources.run(restoredSourceOperator, checkpointLock, new PartialCollector<>(emittedElements, secondBatchSize));
} catch (SuccessException e) {
// success
readSecondBatchOfElements = true;
}
assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
// verify that we saw all NUM_ELEMENTS elements
assertTrue(emittedElements.size() == numElements);
}
Aggregations