Search in sources :

Example 11 with TimestampedFileInputSplit

use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.

the class ContinuousFileProcessingRescalingTest method testReaderScalingUp.

@Test
public void testReaderScalingUp() throws Exception {
    // simulates the scenario of scaling up from 1 to 2 instances
    final OneShotLatch waitingLatch1 = new OneShotLatch();
    final OneShotLatch triggerLatch1 = new OneShotLatch();
    BlockingFileInputFormat format1 = new BlockingFileInputFormat(triggerLatch1, waitingLatch1, new Path("test"), 20, 5);
    FileInputSplit[] splits = format1.createInputSplits(2);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness1 = getTestHarness(format1, 1, 0);
    testHarness1.open();
    testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[0])));
    testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(1, splits[1])));
    // wait until its arrives to element 5
    if (!triggerLatch1.isTriggered()) {
        triggerLatch1.await();
    }
    // this will be the state shared by the 2 new instances.
    OperatorStateHandles snapshot = testHarness1.snapshot(0, 0);
    // 1) clear the output of instance so that we can compare it with one created by the new instances, and
    // 2) let the operator process the rest of its state
    testHarness1.getOutput().clear();
    waitingLatch1.trigger();
    // create the second instance and let it process the second split till element 15
    final OneShotLatch triggerLatch2 = new OneShotLatch();
    final OneShotLatch waitingLatch2 = new OneShotLatch();
    BlockingFileInputFormat format2 = new BlockingFileInputFormat(triggerLatch2, waitingLatch2, new Path("test"), 20, 15);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness2 = getTestHarness(format2, 2, 0);
    testHarness2.setup();
    testHarness2.initializeState(snapshot);
    testHarness2.open();
    BlockingFileInputFormat format3 = new BlockingFileInputFormat(triggerLatch2, waitingLatch2, new Path("test"), 20, 15);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness3 = getTestHarness(format3, 2, 1);
    testHarness3.setup();
    testHarness3.initializeState(snapshot);
    testHarness3.open();
    triggerLatch2.trigger();
    waitingLatch2.trigger();
    // and wait for the processing to finish
    synchronized (testHarness1.getCheckpointLock()) {
        testHarness1.close();
    }
    synchronized (testHarness2.getCheckpointLock()) {
        testHarness2.close();
    }
    synchronized (testHarness3.getCheckpointLock()) {
        testHarness3.close();
    }
    Queue<Object> expectedResult = new ArrayDeque<>();
    putElementsInQ(expectedResult, testHarness1.getOutput());
    Queue<Object> actualResult = new ArrayDeque<>();
    putElementsInQ(actualResult, testHarness2.getOutput());
    putElementsInQ(actualResult, testHarness3.getOutput());
    Assert.assertEquals(35, actualResult.size());
    Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ArrayDeque(java.util.ArrayDeque) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 12 with TimestampedFileInputSplit

use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.

the class ContinuousFileProcessingRescalingTest method getTestHarness.

private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness(BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception {
    ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
    reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, 10, noOfTasks, taksIdx);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    return testHarness;
}
Also used : TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)

Example 13 with TimestampedFileInputSplit

use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.

the class TimestampedFileInputSplitTest method testSplitComparison.

@Test
public void testSplitComparison() {
    TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);
    // smaller mod time
    Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0);
    // lexicographically on the path
    Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0);
    // same mod time, same file so smaller split number first
    Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0);
    // smaller modification time first
    Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) Test(org.junit.Test)

Example 14 with TimestampedFileInputSplit

use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.

the class TimestampedFileInputSplitTest method testSplitEquality.

@Test
public void testSplitEquality() {
    TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);
    TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null);
    Assert.assertEquals(richFirstSplit, richSecondSplit);
    TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null);
    Assert.assertNotEquals(richSecondSplit, richModSecondSplit);
    TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
    Assert.assertEquals(richThirdSplit.getModificationTime(), 10);
    Assert.assertNotEquals(richFirstSplit, richThirdSplit);
    TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null);
    Assert.assertEquals(richThirdSplitCopy, richThirdSplit);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) Test(org.junit.Test)

Aggregations

TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)14 Path (org.apache.flink.core.fs.Path)13 Test (org.junit.Test)12 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)6 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)6 ContinuousFileReaderOperator (org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)5 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)4 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)4 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)4 OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)4 ArrayList (java.util.ArrayList)3 ArrayDeque (java.util.ArrayDeque)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)2 Watermark (org.apache.flink.streaming.api.watermark.Watermark)2 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)2