Search in sources :

Example 21 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class BucketingSinkTest method testScalingUp.

@Test
public void testScalingUp() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness2.setup();
    testHarness2.open();
    testHarness1.processElement(new StreamRecord<>("test1", 1L));
    testHarness1.processElement(new StreamRecord<>("test2", 1L));
    checkFs(outDir, 2, 0, 0, 0);
    testHarness2.processElement(new StreamRecord<>("test3", 1L));
    testHarness2.processElement(new StreamRecord<>("test4", 1L));
    testHarness2.processElement(new StreamRecord<>("test5", 1L));
    checkFs(outDir, 5, 0, 0, 0);
    // intentionally we snapshot them in the reverse order so that the states are shuffled
    OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
    testHarness1 = createRescalingTestSink(outDir, 3, 0, 100);
    testHarness1.setup();
    testHarness1.initializeState(mergedSnapshot);
    testHarness1.open();
    checkFs(outDir, 2, 0, 3, 3);
    testHarness2 = createRescalingTestSink(outDir, 3, 1, 100);
    testHarness2.setup();
    testHarness2.initializeState(mergedSnapshot);
    testHarness2.open();
    checkFs(outDir, 0, 0, 5, 5);
    OneInputStreamOperatorTestHarness<String, Object> testHarness3 = createRescalingTestSink(outDir, 3, 2, 100);
    testHarness3.setup();
    testHarness3.initializeState(mergedSnapshot);
    testHarness3.open();
    checkFs(outDir, 0, 0, 5, 5);
    testHarness1.processElement(new StreamRecord<>("test6", 0));
    testHarness2.processElement(new StreamRecord<>("test6", 0));
    testHarness3.processElement(new StreamRecord<>("test6", 0));
    checkFs(outDir, 3, 0, 5, 5);
    testHarness1.snapshot(1, 0);
    testHarness2.snapshot(1, 0);
    testHarness3.snapshot(1, 0);
    testHarness1.close();
    testHarness2.close();
    testHarness3.close();
    checkFs(outDir, 0, 3, 5, 5);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 22 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class RMQSourceTest method testCheckpointing.

@Test
public void testCheckpointing() throws Exception {
    source.autoAck = false;
    StreamSource<String, RMQSource<String>> src = new StreamSource<>(source);
    AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    sourceThread.start();
    Thread.sleep(5);
    final Random random = new Random(System.currentTimeMillis());
    int numSnapshots = 50;
    long previousSnapshotId;
    long lastSnapshotId = 0;
    long totalNumberOfAcks = 0;
    for (int i = 0; i < numSnapshots; i++) {
        long snapshotId = random.nextLong();
        OperatorStateHandles data;
        synchronized (DummySourceContext.lock) {
            data = testHarness.snapshot(snapshotId, System.currentTimeMillis());
            previousSnapshotId = lastSnapshotId;
            lastSnapshotId = messageId;
        }
        // let some time pass
        Thread.sleep(5);
        // check if the correct number of messages have been snapshotted
        final long numIds = lastSnapshotId - previousSnapshotId;
        RMQTestSource sourceCopy = new RMQTestSource();
        StreamSource<String, RMQTestSource> srcCopy = new StreamSource<>(sourceCopy);
        AbstractStreamOperatorTestHarness<String> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
        testHarnessCopy.setup();
        testHarnessCopy.initializeState(data);
        testHarnessCopy.open();
        ArrayDeque<Tuple2<Long, List<String>>> deque = sourceCopy.getRestoredState();
        List<String> messageIds = deque.getLast().f1;
        assertEquals(numIds, messageIds.size());
        if (messageIds.size() > 0) {
            assertEquals(lastSnapshotId, (long) Long.valueOf(messageIds.get(messageIds.size() - 1)));
        }
        // check if the messages are being acknowledged and the transaction committed
        synchronized (DummySourceContext.lock) {
            source.notifyCheckpointComplete(snapshotId);
        }
        totalNumberOfAcks += numIds;
    }
    Mockito.verify(source.channel, Mockito.times((int) totalNumberOfAcks)).basicAck(Mockito.anyLong(), Mockito.eq(false));
    Mockito.verify(source.channel, Mockito.times(numSnapshots)).txCommit();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 23 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class ContinuousFileProcessingTest method testFunctionRestore.

@Test
public void testFunctionRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    org.apache.hadoop.fs.Path path = null;
    long fileModTime = Long.MIN_VALUE;
    for (int i = 0; i < 1; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        path = file.f0;
        fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
    final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final OneShotLatch latch = new OneShotLatch();
    final DummySourceContext sourceContext = new DummySourceContext() {

        @Override
        public void collect(TimestampedFileInputSplit element) {
            latch.trigger();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.run(sourceContext);
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    // first condition for the source to have updated its state: emit at least one element
    if (!latch.isTriggered()) {
        latch.await();
    }
    // this means it has processed all the splits and updated its state.
    synchronized (sourceContext.getCheckpointLock()) {
    }
    OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
    monitoringFunction.cancel();
    runner.join();
    testHarness.close();
    final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
    AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    Assert.assertNull(error[0]);
    Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
    hdfs.delete(path, false);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 24 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class RollingSinkITCase method testScalingDown.

@Test
public void testScalingDown() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 3, 0);
    testHarness1.setup();
    testHarness1.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 3, 1);
    testHarness2.setup();
    testHarness2.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness3 = createRescalingTestSink(outDir, 3, 2);
    testHarness3.setup();
    testHarness3.open();
    testHarness1.processElement(new StreamRecord<>("test1", 0L));
    checkFs(outDir, 1, 0, 0, 0);
    testHarness2.processElement(new StreamRecord<>("test2", 0L));
    testHarness2.processElement(new StreamRecord<>("test3", 0L));
    testHarness2.processElement(new StreamRecord<>("test4", 0L));
    testHarness2.processElement(new StreamRecord<>("test5", 0L));
    testHarness2.processElement(new StreamRecord<>("test6", 0L));
    checkFs(outDir, 2, 4, 0, 0);
    testHarness3.processElement(new StreamRecord<>("test7", 0L));
    testHarness3.processElement(new StreamRecord<>("test8", 0L));
    checkFs(outDir, 3, 5, 0, 0);
    // intentionally we snapshot them in a not ascending order so that the states are shuffled
    OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness3.snapshot(0, 0), testHarness1.snapshot(0, 0), testHarness2.snapshot(0, 0));
    // with the above state reshuffling, we expect testHarness4 to take the
    // state of the previous testHarness3 and testHarness1 while testHarness5
    // will take that of the previous testHarness1
    OneInputStreamOperatorTestHarness<String, Object> testHarness4 = createRescalingTestSink(outDir, 2, 0);
    testHarness4.setup();
    testHarness4.initializeState(mergedSnapshot);
    testHarness4.open();
    // we do not have a length file for part-2-0 because bucket part-2-0
    // was not "in-progress", but "pending" (its full content is valid).
    checkFs(outDir, 1, 4, 3, 2);
    OneInputStreamOperatorTestHarness<String, Object> testHarness5 = createRescalingTestSink(outDir, 2, 1);
    testHarness5.setup();
    testHarness5.initializeState(mergedSnapshot);
    testHarness5.open();
    checkFs(outDir, 0, 0, 8, 3);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 25 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class RollingSinkITCase method testBucketStateTransitions.

@Test
public void testBucketStateTransitions() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness = createRescalingTestSink(outDir, 1, 0);
    testHarness.setup();
    testHarness.open();
    testHarness.setProcessingTime(0L);
    // we have a bucket size of 5 bytes, so each record will get its own bucket,
    // i.e. the bucket should roll after every record.
    testHarness.processElement(new StreamRecord<>("test1", 1L));
    testHarness.processElement(new StreamRecord<>("test2", 1L));
    checkFs(outDir, 1, 1, 0, 0);
    testHarness.processElement(new StreamRecord<>("test3", 1L));
    checkFs(outDir, 1, 2, 0, 0);
    testHarness.snapshot(0, 0);
    checkFs(outDir, 1, 2, 0, 0);
    testHarness.notifyOfCompletedCheckpoint(0);
    checkFs(outDir, 1, 0, 2, 0);
    OperatorStateHandles snapshot = testHarness.snapshot(1, 0);
    testHarness.close();
    checkFs(outDir, 0, 1, 2, 0);
    testHarness = createRescalingTestSink(outDir, 1, 0);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    checkFs(outDir, 0, 0, 3, 1);
    snapshot = testHarness.snapshot(2, 0);
    testHarness.processElement(new StreamRecord<>("test4", 10));
    checkFs(outDir, 1, 0, 3, 1);
    testHarness = createRescalingTestSink(outDir, 1, 0);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    // the in-progress file remains as we do not clean up now
    checkFs(outDir, 1, 0, 3, 1);
    testHarness.close();
    // at close it is not moved to final because it is not part
    // of the current task's state, it was just a not cleaned up leftover.
    checkFs(outDir, 1, 0, 3, 1);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Aggregations

OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)51 Test (org.junit.Test)45 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 Watermark (org.apache.flink.streaming.api.watermark.Watermark)16 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)14 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)12 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)9 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)8 File (java.io.File)7 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)7 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)7 SequenceFile (org.apache.hadoop.io.SequenceFile)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)6 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 Event (org.apache.flink.cep.Event)4