Search in sources :

Example 26 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FromElementsFunctionTest method testCheckpointAndRestore.

@Test
public void testCheckpointAndRestore() {
    try {
        final int numElements = 10000;
        List<Integer> data = new ArrayList<Integer>(numElements);
        List<Integer> result = new ArrayList<Integer>(numElements);
        for (int i = 0; i < numElements; i++) {
            data.add(i);
        }
        final FromElementsFunction<Integer> source = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
        StreamSource<Integer, FromElementsFunction<Integer>> src = new StreamSource<>(source);
        AbstractStreamOperatorTestHarness<Integer> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
        testHarness.open();
        final SourceFunction.SourceContext<Integer> ctx = new ListSourceContext<Integer>(result, 2L);
        final Throwable[] error = new Throwable[1];
        // run the source asynchronously
        Thread runner = new Thread() {

            @Override
            public void run() {
                try {
                    source.run(ctx);
                } catch (Throwable t) {
                    error[0] = t;
                }
            }
        };
        runner.start();
        // wait for a bit
        Thread.sleep(1000);
        // make a checkpoint
        List<Integer> checkpointData = new ArrayList<>(numElements);
        OperatorSubtaskState handles = null;
        synchronized (ctx.getCheckpointLock()) {
            handles = testHarness.snapshot(566, System.currentTimeMillis());
            checkpointData.addAll(result);
        }
        // cancel the source
        source.cancel();
        runner.join();
        // check for errors
        if (error[0] != null) {
            System.err.println("Error in asynchronous source runner");
            error[0].printStackTrace();
            fail("Error in asynchronous source runner");
        }
        final FromElementsFunction<Integer> sourceCopy = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
        StreamSource<Integer, FromElementsFunction<Integer>> srcCopy = new StreamSource<>(sourceCopy);
        AbstractStreamOperatorTestHarness<Integer> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
        testHarnessCopy.setup();
        testHarnessCopy.initializeState(handles);
        testHarnessCopy.open();
        // recovery run
        SourceFunction.SourceContext<Integer> newCtx = new ListSourceContext<>(checkpointData);
        sourceCopy.run(newCtx);
        assertEquals(data, checkpointData);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) ExpectedException(org.junit.rules.ExpectedException) IOException(java.io.IOException) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) FromElementsFunction(org.apache.flink.streaming.api.functions.source.FromElementsFunction) Test(org.junit.Test)

Example 27 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class DataGeneratorSourceTest method innerTestDataGenCheckpointRestore.

public static <T> void innerTestDataGenCheckpointRestore(Supplier<DataGeneratorSource<T>> supplier, Set<T> expectedOutput) throws Exception {
    final int maxParallelsim = 2;
    final ConcurrentHashMap<String, List<T>> outputCollector = new ConcurrentHashMap<>();
    final OneShotLatch latchToTrigger1 = new OneShotLatch();
    final OneShotLatch latchToWait1 = new OneShotLatch();
    final OneShotLatch latchToTrigger2 = new OneShotLatch();
    final OneShotLatch latchToWait2 = new OneShotLatch();
    final DataGeneratorSource<T> source1 = supplier.get();
    StreamSource<T, DataGeneratorSource<T>> src1 = new StreamSource<>(source1);
    final AbstractStreamOperatorTestHarness<T> testHarness1 = new AbstractStreamOperatorTestHarness<>(src1, maxParallelsim, 2, 0);
    testHarness1.open();
    final DataGeneratorSource<T> source2 = supplier.get();
    StreamSource<T, DataGeneratorSource<T>> src2 = new StreamSource<>(source2);
    final AbstractStreamOperatorTestHarness<T> testHarness2 = new AbstractStreamOperatorTestHarness<>(src2, maxParallelsim, 2, 1);
    testHarness2.open();
    // run the source asynchronously
    Thread runner1 = new Thread(() -> {
        try {
            source1.run(new BlockingSourceContext<>("1", latchToTrigger1, latchToWait1, outputCollector, 21));
        } catch (Throwable t) {
            t.printStackTrace();
        }
    });
    // run the source asynchronously
    Thread runner2 = new Thread(() -> {
        try {
            source2.run(new BlockingSourceContext<>("2", latchToTrigger2, latchToWait2, outputCollector, 32));
        } catch (Throwable t) {
            t.printStackTrace();
        }
    });
    runner1.start();
    runner2.start();
    if (!latchToTrigger1.isTriggered()) {
        latchToTrigger1.await();
    }
    if (!latchToTrigger2.isTriggered()) {
        latchToTrigger2.await();
    }
    OperatorSubtaskState snapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0L, 0L), testHarness2.snapshot(0L, 0L));
    final DataGeneratorSource<T> source3 = supplier.get();
    StreamSource<T, DataGeneratorSource<T>> src3 = new StreamSource<>(source3);
    final OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelsim, 2, 1, 0);
    final AbstractStreamOperatorTestHarness<T> testHarness3 = new AbstractStreamOperatorTestHarness<>(src3, maxParallelsim, 1, 0);
    testHarness3.setup();
    testHarness3.initializeState(initState);
    testHarness3.open();
    final OneShotLatch latchToTrigger3 = new OneShotLatch();
    final OneShotLatch latchToWait3 = new OneShotLatch();
    latchToWait3.trigger();
    // run the source asynchronously
    Thread runner3 = new Thread(() -> {
        try {
            source3.run(new BlockingSourceContext<>("3", latchToTrigger3, latchToWait3, outputCollector, 3));
        } catch (Throwable t) {
            t.printStackTrace();
        }
    });
    runner3.start();
    runner3.join();
    // we have 3 tasks.
    Assert.assertEquals(3, outputCollector.size());
    // test for at-most-once
    Set<T> dedupRes = new HashSet<>(expectedOutput.size());
    for (Map.Entry<String, List<T>> elementsPerTask : outputCollector.entrySet()) {
        String key = elementsPerTask.getKey();
        List<T> elements = outputCollector.get(key);
        // this tests the correctness of the latches in the test
        Assert.assertTrue(elements.size() > 0);
        for (T elem : elements) {
            if (!dedupRes.add(elem)) {
                Assert.fail("Duplicate entry: " + elem);
            }
            if (!expectedOutput.contains(elem)) {
                Assert.fail("Unexpected element: " + elem);
            }
        }
    }
    // test for exactly-once
    Assert.assertEquals(expectedOutput.size(), dedupRes.size());
    latchToWait1.trigger();
    latchToWait2.trigger();
    // wait for everybody ot finish.
    runner1.join();
    runner2.join();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 28 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class DataGenTableSourceFactoryTest method runGenerator.

private List<RowData> runGenerator(ResolvedSchema schema, DescriptorProperties descriptor) throws Exception {
    DynamicTableSource source = createTableSource(schema, descriptor.asMap());
    assertTrue(source instanceof DataGenTableSource);
    DataGenTableSource dataGenTableSource = (DataGenTableSource) source;
    DataGeneratorSource<RowData> gen = dataGenTableSource.createSource();
    // test java serialization.
    gen = InstantiationUtil.clone(gen);
    StreamSource<RowData, DataGeneratorSource<RowData>> src = new StreamSource<>(gen);
    AbstractStreamOperatorTestHarness<RowData> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    TestContext ctx = new TestContext();
    gen.run(ctx);
    return ctx.results;
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) DataGeneratorSource(org.apache.flink.streaming.api.functions.source.datagen.DataGeneratorSource) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) DataGenTableSource(org.apache.flink.connector.datagen.table.DataGenTableSource) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)

Aggregations

AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)28 Test (org.junit.Test)23 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)21 ArrayList (java.util.ArrayList)17 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)11 HashMap (java.util.HashMap)6 List (java.util.List)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)5 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)4 Shard (com.amazonaws.services.kinesis.model.Shard)4 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)4 Path (org.apache.flink.core.fs.Path)4 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)4 TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)4 KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)4 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)4 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)4 TestRuntimeContext (org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext)4 HashSet (java.util.HashSet)3