Search in sources :

Example 31 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class BucketingSinkTest method testSameParallelismWithShufflingStates.

@Test
public void testSameParallelismWithShufflingStates() throws Exception {
    final File outDir = tempFolder.newFolder();
    OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.open();
    OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
    testHarness2.setup();
    testHarness2.open();
    testHarness1.processElement(new StreamRecord<>("test1", 0L));
    checkFs(outDir, 1, 0, 0, 0);
    testHarness2.processElement(new StreamRecord<>("test2", 0L));
    checkFs(outDir, 2, 0, 0, 0);
    // intentionally we snapshot them in the reverse order so that the states are shuffled
    OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
    checkFs(outDir, 2, 0, 0, 0);
    // this will not be included in any checkpoint so it can be cleaned up (although we do not)
    testHarness2.processElement(new StreamRecord<>("test3", 0L));
    checkFs(outDir, 3, 0, 0, 0);
    testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
    testHarness1.setup();
    testHarness1.initializeState(mergedSnapshot);
    testHarness1.open();
    // the one in-progress will be the one assigned to the next instance,
    // the other is the test3 which is just not cleaned up
    checkFs(outDir, 2, 0, 1, 1);
    testHarness2 = createRescalingTestSink(outDir, 2, 1, 100);
    testHarness2.setup();
    testHarness2.initializeState(mergedSnapshot);
    testHarness2.open();
    checkFs(outDir, 1, 0, 2, 2);
    testHarness1.close();
    testHarness2.close();
    // the 1 in-progress can be discarded.
    checkFs(outDir, 1, 0, 2, 2);
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Example 32 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class ContinuousFileProcessingTest method testReaderSnapshotRestore.

@Test
public void testReaderSnapshotRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
    TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    final OneShotLatch latch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath));
    TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
    initReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
    initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    initTestInstance.open();
    // create some state in the reader
    initTestInstance.processElement(new StreamRecord<>(split1));
    initTestInstance.processElement(new StreamRecord<>(split2));
    initTestInstance.processElement(new StreamRecord<>(split3));
    initTestInstance.processElement(new StreamRecord<>(split4));
    // take a snapshot of the operator's state. This will be used
    // to initialize another reader and compare the results of the
    // two operators.
    final OperatorStateHandles snapshot;
    synchronized (initTestInstance.getCheckpointLock()) {
        snapshot = initTestInstance.snapshot(0L, 0L);
    }
    ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>(new BlockingFileInputFormat(latch, new Path(testBasePath)));
    restoredReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader);
    restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    restoredTestInstance.initializeState(snapshot);
    restoredTestInstance.open();
    // now let computation start
    latch.trigger();
    synchronized (initTestInstance.getCheckpointLock()) {
        initTestInstance.close();
    }
    synchronized (restoredTestInstance.getCheckpointLock()) {
        restoredTestInstance.close();
    }
    FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
    FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
    FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
    FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
    // compare if the results contain what they should contain and also if
    // they are the same, as they should.
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
    Assert.assertArrayEquals(initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray());
}
Also used : Path(org.apache.flink.core.fs.Path) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) Test(org.junit.Test)

Example 33 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project beam by apache.

the class DedupingOperatorTest method testDeduping.

@Test
public void testDeduping() throws Exception {
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<ValueWithRecordId<String>>, WindowedValue<String>> harness = getDebupingHarness();
    harness.open();
    String key1 = "key1";
    String key2 = "key2";
    harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key1, key1.getBytes()))));
    harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key2, key2.getBytes()))));
    harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key1, key1.getBytes()))));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(harness.getOutput()), contains(WindowedValue.valueInGlobalWindow(key1), WindowedValue.valueInGlobalWindow(key2)));
    OperatorStateHandles snapshot = harness.snapshot(0L, 0L);
    harness.close();
    harness = getDebupingHarness();
    harness.setup();
    harness.initializeState(snapshot);
    harness.open();
    String key3 = "key3";
    harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key2, key2.getBytes()))));
    harness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(new ValueWithRecordId<>(key3, key3.getBytes()))));
    assertThat(this.<String>stripStreamRecordFromWindowedValue(harness.getOutput()), contains(WindowedValue.valueInGlobalWindow(key3)));
    harness.close();
}
Also used : OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 34 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class FromElementsFunctionTest method testCheckpointAndRestore.

@Test
public void testCheckpointAndRestore() {
    try {
        final int NUM_ELEMENTS = 10000;
        List<Integer> data = new ArrayList<Integer>(NUM_ELEMENTS);
        List<Integer> result = new ArrayList<Integer>(NUM_ELEMENTS);
        for (int i = 0; i < NUM_ELEMENTS; i++) {
            data.add(i);
        }
        final FromElementsFunction<Integer> source = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
        StreamSource<Integer, FromElementsFunction<Integer>> src = new StreamSource<>(source);
        AbstractStreamOperatorTestHarness<Integer> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
        testHarness.open();
        final SourceFunction.SourceContext<Integer> ctx = new ListSourceContext<Integer>(result, 2L);
        final Throwable[] error = new Throwable[1];
        // run the source asynchronously
        Thread runner = new Thread() {

            @Override
            public void run() {
                try {
                    source.run(ctx);
                } catch (Throwable t) {
                    error[0] = t;
                }
            }
        };
        runner.start();
        // wait for a bit 
        Thread.sleep(1000);
        // make a checkpoint
        List<Integer> checkpointData = new ArrayList<>(NUM_ELEMENTS);
        OperatorStateHandles handles = null;
        synchronized (ctx.getCheckpointLock()) {
            handles = testHarness.snapshot(566, System.currentTimeMillis());
            checkpointData.addAll(result);
        }
        // cancel the source
        source.cancel();
        runner.join();
        // check for errors
        if (error[0] != null) {
            System.err.println("Error in asynchronous source runner");
            error[0].printStackTrace();
            fail("Error in asynchronous source runner");
        }
        final FromElementsFunction<Integer> sourceCopy = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
        StreamSource<Integer, FromElementsFunction<Integer>> srcCopy = new StreamSource<>(sourceCopy);
        AbstractStreamOperatorTestHarness<Integer> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
        testHarnessCopy.setup();
        testHarnessCopy.initializeState(handles);
        testHarnessCopy.open();
        // recovery run
        SourceFunction.SourceContext<Integer> newCtx = new ListSourceContext<>(checkpointData);
        sourceCopy.run(newCtx);
        assertEquals(data, checkpointData);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) IOException(java.io.IOException) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) FromElementsFunction(org.apache.flink.streaming.api.functions.source.FromElementsFunction) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) Test(org.junit.Test)

Example 35 with OperatorStateHandles

use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.

the class StatefulSequenceSourceTest method testCheckpointRestore.

@Test
public void testCheckpointRestore() throws Exception {
    final int initElement = 0;
    final int maxElement = 100;
    final Set<Long> expectedOutput = new HashSet<>();
    for (long i = initElement; i <= maxElement; i++) {
        expectedOutput.add(i);
    }
    final ConcurrentHashMap<String, List<Long>> outputCollector = new ConcurrentHashMap<>();
    final OneShotLatch latchToTrigger1 = new OneShotLatch();
    final OneShotLatch latchToWait1 = new OneShotLatch();
    final OneShotLatch latchToTrigger2 = new OneShotLatch();
    final OneShotLatch latchToWait2 = new OneShotLatch();
    final StatefulSequenceSource source1 = new StatefulSequenceSource(initElement, maxElement);
    StreamSource<Long, StatefulSequenceSource> src1 = new StreamSource<>(source1);
    final AbstractStreamOperatorTestHarness<Long> testHarness1 = new AbstractStreamOperatorTestHarness<>(src1, 2, 2, 0);
    testHarness1.open();
    final StatefulSequenceSource source2 = new StatefulSequenceSource(initElement, maxElement);
    StreamSource<Long, StatefulSequenceSource> src2 = new StreamSource<>(source2);
    final AbstractStreamOperatorTestHarness<Long> testHarness2 = new AbstractStreamOperatorTestHarness<>(src2, 2, 2, 1);
    testHarness2.open();
    final Throwable[] error = new Throwable[3];
    // run the source asynchronously
    Thread runner1 = new Thread() {

        @Override
        public void run() {
            try {
                source1.run(new BlockingSourceContext("1", latchToTrigger1, latchToWait1, outputCollector, 21));
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    // run the source asynchronously
    Thread runner2 = new Thread() {

        @Override
        public void run() {
            try {
                source2.run(new BlockingSourceContext("2", latchToTrigger2, latchToWait2, outputCollector, 32));
            } catch (Throwable t) {
                t.printStackTrace();
                error[1] = t;
            }
        }
    };
    runner1.start();
    runner2.start();
    if (!latchToTrigger1.isTriggered()) {
        latchToTrigger1.await();
    }
    if (!latchToTrigger2.isTriggered()) {
        latchToTrigger2.await();
    }
    OperatorStateHandles snapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0L, 0L), testHarness2.snapshot(0L, 0L));
    final StatefulSequenceSource source3 = new StatefulSequenceSource(initElement, maxElement);
    StreamSource<Long, StatefulSequenceSource> src3 = new StreamSource<>(source3);
    final AbstractStreamOperatorTestHarness<Long> testHarness3 = new AbstractStreamOperatorTestHarness<>(src3, 2, 1, 0);
    testHarness3.setup();
    testHarness3.initializeState(snapshot);
    testHarness3.open();
    final OneShotLatch latchToTrigger3 = new OneShotLatch();
    final OneShotLatch latchToWait3 = new OneShotLatch();
    latchToWait3.trigger();
    // run the source asynchronously
    Thread runner3 = new Thread() {

        @Override
        public void run() {
            try {
                source3.run(new BlockingSourceContext("3", latchToTrigger3, latchToWait3, outputCollector, 3));
            } catch (Throwable t) {
                t.printStackTrace();
                error[2] = t;
            }
        }
    };
    runner3.start();
    runner3.join();
    // we have 3 tasks.
    Assert.assertEquals(3, outputCollector.size());
    // test for at-most-once
    Set<Long> dedupRes = new HashSet<>(Math.abs(maxElement - initElement) + 1);
    for (Map.Entry<String, List<Long>> elementsPerTask : outputCollector.entrySet()) {
        String key = elementsPerTask.getKey();
        List<Long> elements = outputCollector.get(key);
        // this tests the correctness of the latches in the test
        Assert.assertTrue(elements.size() > 0);
        for (Long elem : elements) {
            if (!dedupRes.add(elem)) {
                Assert.fail("Duplicate entry: " + elem);
            }
            if (!expectedOutput.contains(elem)) {
                Assert.fail("Unexpected element: " + elem);
            }
        }
    }
    // test for exactly-once
    Assert.assertEquals(Math.abs(initElement - maxElement) + 1, dedupRes.size());
    latchToWait1.trigger();
    latchToWait2.trigger();
    // wait for everybody ot finish.
    runner1.join();
    runner2.join();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ArrayList(java.util.ArrayList) List(java.util.List) StatefulSequenceSource(org.apache.flink.streaming.api.functions.source.StatefulSequenceSource) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)51 Test (org.junit.Test)45 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 Watermark (org.apache.flink.streaming.api.watermark.Watermark)16 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)14 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)12 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)9 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)8 File (java.io.File)7 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)7 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)7 SequenceFile (org.apache.hadoop.io.SequenceFile)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)6 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 Event (org.apache.flink.cep.Event)4