Search in sources :

Example 26 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class BucketingSinkTest method testScalingDown.

@Test
public void testScalingDown() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 3, 0, 100);
    testHarness1.setup();
    testHarness1.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 3, 1, 100);
    testHarness2.setup();
    testHarness2.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness3 = createRescalingTestSink(outDir, 3, 2, 100);
    testHarness3.setup();
    testHarness3.open();
    testHarness1.processElement(new StreamRecord<>("test1", 0L));
    checkFs(outDir, 1, 0, 0, 0);
    testHarness2.processElement(new StreamRecord<>("test2", 0L));
    checkFs(outDir, 2, 0, 0, 0);
    testHarness3.processElement(new StreamRecord<>("test3", 0L));
    testHarness3.processElement(new StreamRecord<>("test4", 0L));
    checkFs(outDir, 4, 0, 0, 0);
    // intentionally we snapshot them in the reverse order so that the states are shuffled
    OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness3.snapshot(0, 0), testHarness1.snapshot(0, 0), testHarness2.snapshot(0, 0));
    testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.initializeState(mergedSnapshot);
    testHarness1.open();
    checkFs(outDir, 1, 0, 3, 3);
    testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
    testHarness2.setup();
    testHarness2.initializeState(mergedSnapshot);
    testHarness2.open();
    checkFs(outDir, 0, 0, 4, 4);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 27 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class BucketingSinkTest method testBucketStateTransitions.

@Test
public void testBucketStateTransitions() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness = createRescalingTestSink(outDir, 1, 0, 100);
    testHarness.setup();
    testHarness.open();
    testHarness.setProcessingTime(0L);
    testHarness.processElement(new StreamRecord<>("test1", 1L));
    testHarness.processElement(new StreamRecord<>("test2", 1L));
    checkFs(outDir, 2, 0, 0, 0);
    // this is to check the inactivity threshold
    testHarness.setProcessingTime(101L);
    checkFs(outDir, 0, 2, 0, 0);
    testHarness.processElement(new StreamRecord<>("test3", 1L));
    checkFs(outDir, 1, 2, 0, 0);
    testHarness.snapshot(0, 0);
    checkFs(outDir, 1, 2, 0, 0);
    testHarness.notifyOfCompletedCheckpoint(0);
    checkFs(outDir, 1, 0, 2, 0);
    OperatorStateHandles snapshot = testHarness.snapshot(1, 0);
    testHarness.close();
    checkFs(outDir, 0, 1, 2, 0);
    testHarness = createRescalingTestSink(outDir, 1, 0, 100);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    checkFs(outDir, 0, 0, 3, 1);
    snapshot = testHarness.snapshot(2, 0);
    testHarness.processElement(new StreamRecord<>("test4", 10));
    checkFs(outDir, 1, 0, 3, 1);
    testHarness = createRescalingTestSink(outDir, 1, 0, 100);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    // the in-progress file remains as we do not clean up now
    checkFs(outDir, 1, 0, 3, 1);
    testHarness.close();
    // at close it is not moved to final because it is not part
    // of the current task's state, it was just a not cleaned up leftover.
    checkFs(outDir, 1, 0, 3, 1);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 28 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class BucketingSinkTest method testSameParallelismWithShufflingStates.

@Test
public void testSameParallelismWithShufflingStates() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
    testHarness2.setup();
    testHarness2.open();
    testHarness1.processElement(new StreamRecord<>("test1", 0L));
    checkFs(outDir, 1, 0, 0, 0);
    testHarness2.processElement(new StreamRecord<>("test2", 0L));
    checkFs(outDir, 2, 0, 0, 0);
    // intentionally we snapshot them in the reverse order so that the states are shuffled
    OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
    checkFs(outDir, 2, 0, 0, 0);
    // this will not be included in any checkpoint so it can be cleaned up (although we do not)
    testHarness2.processElement(new StreamRecord<>("test3", 0L));
    checkFs(outDir, 3, 0, 0, 0);
    testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.initializeState(mergedSnapshot);
    testHarness1.open();
    // the one in-progress will be the one assigned to the next instance,
    // the other is the test3 which is just not cleaned up
    checkFs(outDir, 2, 0, 1, 1);
    testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
    testHarness2.setup();
    testHarness2.initializeState(mergedSnapshot);
    testHarness2.open();
    checkFs(outDir, 1, 0, 2, 2);
    testHarness1.close();
    testHarness2.close();
    // the 1 in-progress can be discarded.
    checkFs(outDir, 1, 0, 2, 2);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 29 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class ContinuousFileProcessingTest method testReaderSnapshotRestore.

@Test
public void testReaderSnapshotRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
    TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    final OneShotLatch latch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath));
    TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
    initReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
    initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    initTestInstance.open();
    // create some state in the reader
    initTestInstance.processElement(new StreamRecord<>(split1));
    initTestInstance.processElement(new StreamRecord<>(split2));
    initTestInstance.processElement(new StreamRecord<>(split3));
    initTestInstance.processElement(new StreamRecord<>(split4));
    // take a snapshot of the operator's state. This will be used
    // to initialize another reader and compare the results of the
    // two operators.
    final OperatorStateHandles snapshot;
    synchronized (initTestInstance.getCheckpointLock()) {
        snapshot = initTestInstance.snapshot(0L, 0L);
    }
    ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>(new BlockingFileInputFormat(latch, new Path(testBasePath)));
    restoredReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader);
    restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    restoredTestInstance.initializeState(snapshot);
    restoredTestInstance.open();
    // now let computation start
    latch.trigger();
    synchronized (initTestInstance.getCheckpointLock()) {
        initTestInstance.close();
    }
    synchronized (restoredTestInstance.getCheckpointLock()) {
        restoredTestInstance.close();
    }
    FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
    FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
    FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
    FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
    // compare if the results contain what they should contain and also if
    // they are the same, as they should.
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
    Assert.assertArrayEquals(initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray());
}
Also used : Path(org.apache.flink.core.fs.Path) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) Test(org.junit.Test)

Example 30 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class CEPOperatorTest method testKeyedCEPOperatorCheckpointingWithRocksDB.

@Test
public void testKeyedCEPOperatorCheckpointingWithRocksDB() throws Exception {
    String rocksDbPath = tempFolder.newFolder().getAbsolutePath();
    RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(new MemoryStateBackend());
    rocksDBStateBackend.setDbStoragePath(rocksDbPath);
    OneInputStreamOperatorTestHarness<Event, Map<String, Event>> harness = getCepTestHarness(false);
    harness.setStateBackend(rocksDBStateBackend);
    harness.open();
    Event startEvent = new Event(42, "start", 1.0);
    SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
    Event endEvent = new Event(42, "end", 1.0);
    harness.processElement(new StreamRecord<>(startEvent, 1L));
    harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 2L));
    // simulate snapshot/restore with some elements in internal sorting queue
    OperatorStateHandles snapshot = harness.snapshot(0L, 0L);
    harness.close();
    harness = getCepTestHarness(false);
    rocksDBStateBackend = new RocksDBStateBackend(new MemoryStateBackend());
    rocksDBStateBackend.setDbStoragePath(rocksDbPath);
    harness.setStateBackend(rocksDBStateBackend);
    harness.setup();
    harness.initializeState(snapshot);
    harness.open();
    harness.processWatermark(new Watermark(Long.MIN_VALUE));
    harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3L));
    // if element timestamps are not correctly checkpointed/restored this will lead to
    // a pruning time underflow exception in NFA
    harness.processWatermark(new Watermark(2L));
    // simulate snapshot/restore with empty element queue but NFA state
    OperatorStateHandles snapshot2 = harness.snapshot(1L, 1L);
    harness.close();
    harness = getCepTestHarness(false);
    rocksDBStateBackend = new RocksDBStateBackend(new MemoryStateBackend());
    rocksDBStateBackend.setDbStoragePath(rocksDbPath);
    harness.setStateBackend(rocksDBStateBackend);
    harness.setup();
    harness.initializeState(snapshot2);
    harness.open();
    harness.processElement(new StreamRecord<Event>(middleEvent, 3L));
    harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4L));
    harness.processElement(new StreamRecord<>(endEvent, 5L));
    harness.processWatermark(new Watermark(Long.MAX_VALUE));
    // get and verify the output
    Queue<Object> result = harness.getOutput();
    assertEquals(2, result.size());
    verifyPattern(result.poll(), startEvent, middleEvent, endEvent);
    verifyWatermark(result.poll(), Long.MAX_VALUE);
    harness.close();
}
Also used : RocksDBStateBackend(org.apache.flink.contrib.streaming.state.RocksDBStateBackend) SubEvent(org.apache.flink.cep.SubEvent) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) HashMap(java.util.HashMap) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Aggregations

OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)51 Test (org.junit.Test)45 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 Watermark (org.apache.flink.streaming.api.watermark.Watermark)16 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)14 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)12 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)9 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)8 File (java.io.File)7 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)7 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)7 SequenceFile (org.apache.hadoop.io.SequenceFile)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)6 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 Event (org.apache.flink.cep.Event)4