use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class BucketingSinkTest method testSameParallelismWithShufflingStates.
@Test
public void testSameParallelismWithShufflingStates() throws Exception {
final File outDir = tempFolder.newFolder();
OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
testHarness1.setup();
testHarness1.open();
OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
testHarness2.setup();
testHarness2.open();
testHarness1.processElement(new StreamRecord<>("test1", 0L));
checkFs(outDir, 1, 0, 0, 0);
testHarness2.processElement(new StreamRecord<>("test2", 0L));
checkFs(outDir, 2, 0, 0, 0);
// intentionally we snapshot them in the reverse order so that the states are shuffled
OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
checkFs(outDir, 2, 0, 0, 0);
// this will not be included in any checkpoint so it can be cleaned up (although we do not)
testHarness2.processElement(new StreamRecord<>("test3", 0L));
checkFs(outDir, 3, 0, 0, 0);
testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
testHarness1.setup();
testHarness1.initializeState(mergedSnapshot);
testHarness1.open();
// the one in-progress will be the one assigned to the next instance,
// the other is the test3 which is just not cleaned up
checkFs(outDir, 2, 0, 1, 1);
testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
testHarness2.setup();
testHarness2.initializeState(mergedSnapshot);
testHarness2.open();
checkFs(outDir, 1, 0, 2, 2);
testHarness1.close();
testHarness2.close();
// the 1 in-progress can be discarded.
checkFs(outDir, 1, 0, 2, 2);
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class ContinuousFileProcessingTest method testReaderSnapshotRestore.
@Test
public void testReaderSnapshotRestore() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.open();
// create some state in the reader
initTestInstance.processElement(new StreamRecord<>(split1));
initTestInstance.processElement(new StreamRecord<>(split2));
initTestInstance.processElement(new StreamRecord<>(split3));
initTestInstance.processElement(new StreamRecord<>(split4));
// take a snapshot of the operator's state. This will be used
// to initialize another reader and compare the results of the
// two operators.
final OperatorStateHandles snapshot;
synchronized (initTestInstance.getCheckpointLock()) {
snapshot = initTestInstance.snapshot(0L, 0L);
}
ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>(new BlockingFileInputFormat(latch, new Path(testBasePath)));
restoredReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader);
restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
restoredTestInstance.initializeState(snapshot);
restoredTestInstance.open();
// now let computation start
latch.trigger();
synchronized (initTestInstance.getCheckpointLock()) {
initTestInstance.close();
}
synchronized (restoredTestInstance.getCheckpointLock()) {
restoredTestInstance.close();
}
FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
// compare if the results contain what they should contain and also if
// they are the same, as they should.
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
Assert.assertArrayEquals(initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray());
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project beam by apache.
the class DedupingOperatorTest method testDeduping.
@Test
public void testDeduping() throws Exception {
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<ValueWithRecordId<String>>, WindowedValue<String>> harness = getDebupingHarness();
harness.open();
String key1 = "key1";
String key2 = "key2";
harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key1, key1.getBytes()))));
harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key2, key2.getBytes()))));
harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key1, key1.getBytes()))));
assertThat(this.<String>stripStreamRecordFromWindowedValue(harness.getOutput()), contains(WindowedValue.valueInGlobalWindow(key1), WindowedValue.valueInGlobalWindow(key2)));
OperatorStateHandles snapshot = harness.snapshot(0L, 0L);
harness.close();
harness = getDebupingHarness();
harness.setup();
harness.initializeState(snapshot);
harness.open();
String key3 = "key3";
harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key2, key2.getBytes()))));
harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key3, key3.getBytes()))));
assertThat(this.<String>stripStreamRecordFromWindowedValue(harness.getOutput()), contains(WindowedValue.valueInGlobalWindow(key3)));
harness.close();
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class FromElementsFunctionTest method testCheckpointAndRestore.
@Test
public void testCheckpointAndRestore() {
try {
final int NUM_ELEMENTS = 10000;
List<Integer> data = new ArrayList<Integer>(NUM_ELEMENTS);
List<Integer> result = new ArrayList<Integer>(NUM_ELEMENTS);
for (int i = 0; i < NUM_ELEMENTS; i++) {
data.add(i);
}
final FromElementsFunction<Integer> source = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
StreamSource<Integer, FromElementsFunction<Integer>> src = new StreamSource<>(source);
AbstractStreamOperatorTestHarness<Integer> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.open();
final SourceFunction.SourceContext<Integer> ctx = new ListSourceContext<Integer>(result, 2L);
final Throwable[] error = new Throwable[1];
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
source.run(ctx);
} catch (Throwable t) {
error[0] = t;
}
}
};
runner.start();
// wait for a bit
Thread.sleep(1000);
// make a checkpoint
List<Integer> checkpointData = new ArrayList<>(NUM_ELEMENTS);
OperatorStateHandles handles = null;
synchronized (ctx.getCheckpointLock()) {
handles = testHarness.snapshot(566, System.currentTimeMillis());
checkpointData.addAll(result);
}
// cancel the source
source.cancel();
runner.join();
// check for errors
if (error[0] != null) {
System.err.println("Error in asynchronous source runner");
error[0].printStackTrace();
fail("Error in asynchronous source runner");
}
final FromElementsFunction<Integer> sourceCopy = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
StreamSource<Integer, FromElementsFunction<Integer>> srcCopy = new StreamSource<>(sourceCopy);
AbstractStreamOperatorTestHarness<Integer> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
testHarnessCopy.setup();
testHarnessCopy.initializeState(handles);
testHarnessCopy.open();
// recovery run
SourceFunction.SourceContext<Integer> newCtx = new ListSourceContext<>(checkpointData);
sourceCopy.run(newCtx);
assertEquals(data, checkpointData);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class StatefulSequenceSourceTest method testCheckpointRestore.
@Test
public void testCheckpointRestore() throws Exception {
final int initElement = 0;
final int maxElement = 100;
final Set<Long> expectedOutput = new HashSet<>();
for (long i = initElement; i <= maxElement; i++) {
expectedOutput.add(i);
}
final ConcurrentHashMap<String, List<Long>> outputCollector = new ConcurrentHashMap<>();
final OneShotLatch latchToTrigger1 = new OneShotLatch();
final OneShotLatch latchToWait1 = new OneShotLatch();
final OneShotLatch latchToTrigger2 = new OneShotLatch();
final OneShotLatch latchToWait2 = new OneShotLatch();
final StatefulSequenceSource source1 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src1 = new StreamSource<>(source1);
final AbstractStreamOperatorTestHarness<Long> testHarness1 = new AbstractStreamOperatorTestHarness<>(src1, 2, 2, 0);
testHarness1.open();
final StatefulSequenceSource source2 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src2 = new StreamSource<>(source2);
final AbstractStreamOperatorTestHarness<Long> testHarness2 = new AbstractStreamOperatorTestHarness<>(src2, 2, 2, 1);
testHarness2.open();
final Throwable[] error = new Throwable[3];
// run the source asynchronously
Thread runner1 = new Thread() {
@Override
public void run() {
try {
source1.run(new BlockingSourceContext("1", latchToTrigger1, latchToWait1, outputCollector, 21));
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
// run the source asynchronously
Thread runner2 = new Thread() {
@Override
public void run() {
try {
source2.run(new BlockingSourceContext("2", latchToTrigger2, latchToWait2, outputCollector, 32));
} catch (Throwable t) {
t.printStackTrace();
error[1] = t;
}
}
};
runner1.start();
runner2.start();
if (!latchToTrigger1.isTriggered()) {
latchToTrigger1.await();
}
if (!latchToTrigger2.isTriggered()) {
latchToTrigger2.await();
}
OperatorStateHandles snapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0L, 0L), testHarness2.snapshot(0L, 0L));
final StatefulSequenceSource source3 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src3 = new StreamSource<>(source3);
final AbstractStreamOperatorTestHarness<Long> testHarness3 = new AbstractStreamOperatorTestHarness<>(src3, 2, 1, 0);
testHarness3.setup();
testHarness3.initializeState(snapshot);
testHarness3.open();
final OneShotLatch latchToTrigger3 = new OneShotLatch();
final OneShotLatch latchToWait3 = new OneShotLatch();
latchToWait3.trigger();
// run the source asynchronously
Thread runner3 = new Thread() {
@Override
public void run() {
try {
source3.run(new BlockingSourceContext("3", latchToTrigger3, latchToWait3, outputCollector, 3));
} catch (Throwable t) {
t.printStackTrace();
error[2] = t;
}
}
};
runner3.start();
runner3.join();
// we have 3 tasks.
Assert.assertEquals(3, outputCollector.size());
// test for at-most-once
Set<Long> dedupRes = new HashSet<>(Math.abs(maxElement - initElement) + 1);
for (Map.Entry<String, List<Long>> elementsPerTask : outputCollector.entrySet()) {
String key = elementsPerTask.getKey();
List<Long> elements = outputCollector.get(key);
// this tests the correctness of the latches in the test
Assert.assertTrue(elements.size() > 0);
for (Long elem : elements) {
if (!dedupRes.add(elem)) {
Assert.fail("Duplicate entry: " + elem);
}
if (!expectedOutput.contains(elem)) {
Assert.fail("Unexpected element: " + elem);
}
}
}
// test for exactly-once
Assert.assertEquals(Math.abs(initElement - maxElement) + 1, dedupRes.size());
latchToWait1.trigger();
latchToWait2.trigger();
// wait for everybody ot finish.
runner1.join();
runner2.join();
}
Aggregations