Search in sources :

Example 16 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class ContinuousFileProcessingTest method testFunctionRestore.

@Test
public void testFunctionRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    org.apache.hadoop.fs.Path path = null;
    long fileModTime = Long.MIN_VALUE;
    for (int i = 0; i < 1; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        path = file.f0;
        fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
    final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final OneShotLatch latch = new OneShotLatch();
    final DummySourceContext sourceContext = new DummySourceContext() {

        @Override
        public void collect(TimestampedFileInputSplit element) {
            latch.trigger();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.run(sourceContext);
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    // first condition for the source to have updated its state: emit at least one element
    if (!latch.isTriggered()) {
        latch.await();
    }
    // this means it has processed all the splits and updated its state.
    synchronized (sourceContext.getCheckpointLock()) {
    }
    OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
    monitoringFunction.cancel();
    runner.join();
    testHarness.close();
    final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
    AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    Assert.assertNull(error[0]);
    Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
    hdfs.delete(path, false);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 17 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class ContinuousFileProcessingMigrationTest method writeMonitoringSourceSnapshot.

/**
 * Manually run this to write binary snapshot data. Remove @Ignore to run.
 */
@Ignore
@Test
public void writeMonitoringSourceSnapshot() throws Exception {
    File testFolder = tempFolder.newFolder();
    long fileModTime = Long.MIN_VALUE;
    for (int i = 0; i < 1; i++) {
        Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line.");
        fileModTime = file.f0.lastModified();
    }
    TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath()));
    final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
    final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final OneShotLatch latch = new OneShotLatch();
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.run(new DummySourceContext() {

                    @Override
                    public void collect(TimestampedFileInputSplit element) {
                        latch.trigger();
                    }

                    @Override
                    public void markAsTemporarilyIdle() {
                    }
                });
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    final OperatorSubtaskState snapshot;
    synchronized (testHarness.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0L, 0L);
    }
    OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot");
    monitoringFunction.cancel();
    runner.join();
    testHarness.close();
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) File(java.io.File) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 18 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class ArrowSourceFunctionTestBase method testRestore.

@Test
public void testRestore() throws Exception {
    Tuple2<List<RowData>, Integer> testData = getTestData();
    final ArrowSourceFunction arrowSourceFunction = createTestArrowSourceFunction(testData.f0, testData.f1);
    final AbstractStreamOperatorTestHarness<RowData> testHarness = new AbstractStreamOperatorTestHarness<>(new StreamSource<>(arrowSourceFunction), 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final MultiShotLatch latch = new MultiShotLatch();
    final AtomicInteger numOfEmittedElements = new AtomicInteger(0);
    final List<RowData> results = new ArrayList<>();
    final DummySourceContext<RowData> sourceContext = new DummySourceContext<RowData>() {

        @Override
        public void collect(RowData element) {
            if (numOfEmittedElements.get() == 2) {
                latch.trigger();
                // fail the source function at the second element
                throw new RuntimeException("Fail the arrow source");
            }
            results.add(typeSerializer.copy(element));
            numOfEmittedElements.incrementAndGet();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread(() -> {
        try {
            arrowSourceFunction.run(sourceContext);
        } catch (Throwable t) {
            if (!t.getMessage().equals("Fail the arrow source")) {
                error[0] = t;
            }
        }
    });
    runner.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    OperatorSubtaskState snapshot;
    synchronized (sourceContext.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0, 0);
    }
    runner.join();
    testHarness.close();
    final ArrowSourceFunction arrowSourceFunction2 = createTestArrowSourceFunction(testData.f0, testData.f1);
    AbstractStreamOperatorTestHarness testHarnessCopy = new AbstractStreamOperatorTestHarness(new StreamSource<>(arrowSourceFunction2), 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    // run the source asynchronously
    Thread runner2 = new Thread(() -> {
        try {
            arrowSourceFunction2.run(new DummySourceContext<RowData>() {

                @Override
                public void collect(RowData element) {
                    results.add(typeSerializer.copy(element));
                    if (numOfEmittedElements.incrementAndGet() == testData.f0.size()) {
                        latch.trigger();
                    }
                }
            });
        } catch (Throwable t) {
            error[0] = t;
        }
    });
    runner2.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    runner2.join();
    Assert.assertNull(error[0]);
    Assert.assertEquals(testData.f0.size(), numOfEmittedElements.get());
    checkElementsEquals(results, testData.f0);
}
Also used : MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) ArrayList(java.util.ArrayList) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RowData(org.apache.flink.table.data.RowData) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 19 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class CEPOperatorTest method testCEPOperatorComparatorProcessTime.

@Test
public void testCEPOperatorComparatorProcessTime() throws Exception {
    Event startEvent1 = new Event(42, "start", 1.0);
    Event startEvent2 = new Event(42, "start", 2.0);
    SubEvent middleEvent1 = new SubEvent(42, "foo1", 3.0, 10.0);
    SubEvent middleEvent2 = new SubEvent(42, "foo2", 4.0, 10.0);
    Event endEvent1 = new Event(42, "end", 1.0);
    Event startEventK2 = new Event(43, "start", 1.0);
    CepOperator<Event, Integer, Map<String, List<Event>>> operator = getKeyedCepOperatorWithComparator(true);
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = CepOperatorTestUtilities.getCepTestHarness(operator);
    try {
        harness.open();
        harness.setProcessingTime(0L);
        harness.processElement(new StreamRecord<>(startEvent1, 0L));
        harness.processElement(new StreamRecord<>(startEventK2, 0L));
        harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 0L));
        harness.processElement(new StreamRecord<>(new SubEvent(42, "barfoo", 1.0, 5.0), 0L));
        assertTrue(!operator.hasNonEmptySharedBuffer(42));
        assertTrue(!operator.hasNonEmptySharedBuffer(43));
        harness.setProcessingTime(3L);
        assertTrue(operator.hasNonEmptySharedBuffer(42));
        assertTrue(operator.hasNonEmptySharedBuffer(43));
        harness.processElement(new StreamRecord<>(middleEvent2, 3L));
        harness.processElement(new StreamRecord<>(middleEvent1, 3L));
        harness.processElement(new StreamRecord<>(startEvent2, 3L));
        OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
        harness.close();
        CepOperator<Event, Integer, Map<String, List<Event>>> operator2 = getKeyedCepOperatorWithComparator(true);
        harness = CepOperatorTestUtilities.getCepTestHarness(operator2);
        harness.setup();
        harness.initializeState(snapshot);
        harness.open();
        harness.setProcessingTime(4L);
        harness.processElement(new StreamRecord<>(endEvent1, 5L));
        harness.setProcessingTime(5L);
        verifyPattern(harness.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        verifyPattern(harness.getOutput().poll(), startEvent1, middleEvent2, endEvent1);
        verifyPattern(harness.getOutput().poll(), startEvent2, middleEvent1, endEvent1);
        verifyPattern(harness.getOutput().poll(), startEvent2, middleEvent2, endEvent1);
    } finally {
        harness.close();
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) Map(java.util.Map) HashMap(java.util.HashMap) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 20 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class CEPOperatorTest method testKeyedCEPOperatorCheckpointing.

@Test
public void testKeyedCEPOperatorCheckpointing() throws Exception {
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = getCepTestHarness(false);
    try {
        harness.open();
        Event startEvent = new Event(42, "start", 1.0);
        SubEvent middleEvent = new SubEvent(42, "foo", 1.0, 10.0);
        Event endEvent = new Event(42, "end", 1.0);
        harness.processElement(new StreamRecord<>(startEvent, 1L));
        harness.processElement(new StreamRecord<>(new Event(42, "foobar", 1.0), 2L));
        // simulate snapshot/restore with some elements in internal sorting queue
        OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
        harness.close();
        harness = getCepTestHarness(false);
        harness.setup();
        harness.initializeState(snapshot);
        harness.open();
        harness.processWatermark(new Watermark(Long.MIN_VALUE));
        harness.processElement(new StreamRecord<Event>(new SubEvent(42, "barfoo", 1.0, 5.0), 3L));
        // if element timestamps are not correctly checkpointed/restored this will lead to
        // a pruning time underflow exception in NFA
        harness.processWatermark(new Watermark(2L));
        harness.processElement(new StreamRecord<Event>(middleEvent, 3L));
        harness.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4L));
        harness.processElement(new StreamRecord<>(endEvent, 5L));
        // simulate snapshot/restore with empty element queue but NFA state
        OperatorSubtaskState snapshot2 = harness.snapshot(1L, 1L);
        harness.close();
        harness = getCepTestHarness(false);
        harness.setup();
        harness.initializeState(snapshot2);
        harness.open();
        harness.processWatermark(new Watermark(Long.MAX_VALUE));
        // get and verify the output
        Queue<Object> result = harness.getOutput();
        assertEquals(2, result.size());
        verifyPattern(result.poll(), startEvent, middleEvent, endEvent);
        verifyWatermark(result.poll(), Long.MAX_VALUE);
    } finally {
        harness.close();
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) Map(java.util.Map) HashMap(java.util.HashMap) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Aggregations

OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)178 Test (org.junit.Test)142 Watermark (org.apache.flink.streaming.api.watermark.Watermark)52 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)37 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)28 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)23 Map (java.util.Map)22 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)21 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)19 HashMap (java.util.HashMap)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)16 Event (org.apache.flink.cep.Event)16 SubEvent (org.apache.flink.cep.SubEvent)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)15 GenericRowData (org.apache.flink.table.data.GenericRowData)15 Ignore (org.junit.Ignore)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)14