Search in sources :

Example 1 with ContinuousFileReaderOperator

use of org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator in project flink by apache.

the class ContinuousFileProcessingITCase method testProgram.

//						END OF PREPARATIONS
@Override
protected void testProgram() throws Exception {
    /*
		* This test checks the interplay between the monitor and the reader
		* and also the failExternally() functionality. To test the latter we
		* set the parallelism to 1 so that we have the chaining between the sink,
		* which throws the SuccessException to signal the end of the test, and the
		* reader.
		* */
    TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
    format.setFilePath(hdfsURI);
    format.setFilesFilter(FilePathFilter.createDefaultFilter());
    // create the stream execution environment with a parallelism > 1 to test
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL);
    // the monitor has always DOP 1
    DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
    Assert.assertEquals(1, splits.getParallelism());
    ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
    TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
    // the readers can be multiple
    DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
    Assert.assertEquals(PARALLELISM, content.getParallelism());
    // finally for the sink we set the parallelism to 1 so that we can verify the output
    TestingSinkFunction sink = new TestingSinkFunction();
    content.addSink(sink).setParallelism(1);
    Thread job = new Thread() {

        @Override
        public void run() {
            try {
                env.execute("ContinuousFileProcessingITCase Job.");
            } catch (Exception e) {
                Throwable th = e;
                for (int depth = 0; depth < 20; depth++) {
                    if (th instanceof SuccessException) {
                        try {
                            postSubmit();
                        } catch (Exception e1) {
                            e1.printStackTrace();
                        }
                        return;
                    } else if (th.getCause() != null) {
                        th = th.getCause();
                    } else {
                        break;
                    }
                }
                e.printStackTrace();
                Assert.fail(e.getMessage());
            }
        }
    };
    job.start();
    // The modification time of the last created file.
    long lastCreatedModTime = Long.MIN_VALUE;
    // create the files to be read
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
        long modTime;
        do {
            // give it some time so that the files have
            // different modification timestamps.
            Thread.sleep(50);
            tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");
            modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
            if (modTime <= lastCreatedModTime) {
                // delete the last created file to recreate it with a different timestamp
                hdfs.delete(tmpFile.f0, false);
            }
        } while (modTime <= lastCreatedModTime);
        lastCreatedModTime = modTime;
        // put the contents in the expected results list before the reader picks them
        // this is to guarantee that they are in before the reader finishes (avoid race conditions)
        expectedContents.put(i, tmpFile.f1);
        org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
        hdfs.rename(tmpFile.f0, file);
        Assert.assertTrue(hdfs.exists(file));
    }
    // wait for the job to finish.
    job.join();
}
Also used : TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) Path(org.apache.flink.core.fs.Path) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) IOException(java.io.IOException) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator)

Example 2 with ContinuousFileReaderOperator

use of org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator in project flink by apache.

the class ContinuousFileProcessingMigrationTest method testReaderSnapshotRestore.

//						END OF PREPARATIONS
//						TESTS
@Test
public void testReaderSnapshotRestore() throws Exception {
    /*

		FileInputSplit split1 =
			new FileInputSplit(3, new Path("test/test1"), 0, 100, null);
		FileInputSplit split2 =
			new FileInputSplit(2, new Path("test/test2"), 101, 200, null);
		FileInputSplit split3 =
			new FileInputSplit(1, new Path("test/test2"), 0, 100, null);
		FileInputSplit split4 =
			new FileInputSplit(0, new Path("test/test3"), 0, 100, null);

		final OneShotLatch latch = new OneShotLatch();
		BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
		TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
		ContinuousFileReaderOperator<FileInputSplit, ?> initReader = new ContinuousFileReaderOperator<>(format);
		initReader.setOutputType(typeInfo, new ExecutionConfig());
		OneInputStreamOperatorTestHarness<FileInputSplit, FileInputSplit> initTestInstance =
			new OneInputStreamOperatorTestHarness<>(initReader);
		initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
		initTestInstance.open();
		// create some state in the reader
		initTestInstance.processElement(new StreamRecord<>(split1));
		initTestInstance.processElement(new StreamRecord<>(split2));
		initTestInstance.processElement(new StreamRecord<>(split3));
		initTestInstance.processElement(new StreamRecord<>(split4));
		// take a snapshot of the operator's state. This will be used
		// to initialize another reader and compare the results of the
		// two operators.
		final StreamTaskState snapshot;
		synchronized (initTestInstance.getCheckpointLock()) {
			snapshot = initTestInstance.snapshot(0L, 0L);
		}

		initTestInstance.snaphotToFile(snapshot, "src/test/resources/reader-migration-test-flink1.1-snapshot");

		*/
    TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
    TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    final OneShotLatch latch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
    TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
    initReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
    initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    initTestInstance.setup();
    initTestInstance.initializeStateFromLegacyCheckpoint(getResourceFilename("reader-migration-test-flink1.1-snapshot"));
    initTestInstance.open();
    latch.trigger();
    synchronized (initTestInstance.getCheckpointLock()) {
        initTestInstance.close();
    }
    FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
    FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
    FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
    FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
    // compare if the results contain what they should contain and also if
    // they are the same, as they should.
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) Test(org.junit.Test)

Example 3 with ContinuousFileReaderOperator

use of org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator in project flink by apache.

the class ContinuousFileProcessingTest method testFileReadingOperatorWithEventTime.

@Test
public void testFileReadingOperatorWithEventTime() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    Map<String, Long> modTimes = new HashMap<>();
    Map<Integer, String> expectedFileContents = new HashMap<>();
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        modTimes.put(file.f0.getName(), hdfs.getFileStatus(file.f0).getModificationTime());
        filesCreated.add(file.f0);
        expectedFileContents.put(i, file.f1);
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
    reader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> tester = new OneInputStreamOperatorTestHarness<>(reader);
    tester.setTimeCharacteristic(TimeCharacteristic.EventTime);
    tester.open();
    // create the necessary splits for the test
    FileInputSplit[] splits = format.createInputSplits(reader.getRuntimeContext().getNumberOfParallelSubtasks());
    // and feed them to the operator
    for (FileInputSplit split : splits) {
        tester.processElement(new StreamRecord<>(new TimestampedFileInputSplit(modTimes.get(split.getPath().getName()), split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())));
    }
    // then close the reader gracefully (and wait to finish reading)
    synchronized (tester.getCheckpointLock()) {
        tester.close();
    }
    // the lines received must be the elements in the files +1 for for the longMax watermark
    // we are in event time, which emits no watermarks, so the last watermark will mark the
    // of the input stream.
    Assert.assertEquals(NO_OF_FILES * LINES_PER_FILE + 1, tester.getOutput().size());
    Map<Integer, List<String>> actualFileContents = new HashMap<>();
    Object lastElement = null;
    for (Object line : tester.getOutput()) {
        lastElement = line;
        if (line instanceof StreamRecord) {
            @SuppressWarnings("unchecked") StreamRecord<String> element = (StreamRecord<String>) line;
            int fileIdx = Character.getNumericValue(element.getValue().charAt(0));
            List<String> content = actualFileContents.get(fileIdx);
            if (content == null) {
                content = new ArrayList<>();
                actualFileContents.put(fileIdx, content);
            }
            content.add(element.getValue() + "\n");
        }
    }
    // check if the last element is the LongMax watermark
    Assert.assertTrue(lastElement instanceof Watermark);
    Assert.assertEquals(Long.MAX_VALUE, ((Watermark) lastElement).getTimestamp());
    Assert.assertEquals(expectedFileContents.size(), actualFileContents.size());
    for (Integer fileIdx : expectedFileContents.keySet()) {
        Assert.assertTrue("file" + fileIdx + " not found", actualFileContents.keySet().contains(fileIdx));
        List<String> cntnt = actualFileContents.get(fileIdx);
        Collections.sort(cntnt, new Comparator<String>() {

            @Override
            public int compare(String o1, String o2) {
                return getLineNo(o1) - getLineNo(o2);
            }
        });
        StringBuilder cntntStr = new StringBuilder();
        for (String line : cntnt) {
            cntntStr.append(line);
        }
        Assert.assertEquals(expectedFileContents.get(fileIdx), cntntStr.toString());
    }
    for (org.apache.hadoop.fs.Path file : filesCreated) {
        hdfs.delete(file, false);
    }
}
Also used : TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) HashMap(java.util.HashMap) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) Path(org.apache.flink.core.fs.Path) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 4 with ContinuousFileReaderOperator

use of org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator in project flink by apache.

the class ContinuousFileProcessingTest method testFileReadingOperatorWithIngestionTime.

@Test
public void testFileReadingOperatorWithIngestionTime() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    Map<Integer, String> expectedFileContents = new HashMap<>();
    Map<String, Long> modTimes = new HashMap<>();
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        filesCreated.add(file.f0);
        modTimes.put(file.f0.getName(), hdfs.getFileStatus(file.f0).getModificationTime());
        expectedFileContents.put(i, file.f1);
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
    final long watermarkInterval = 10;
    ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
    final OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> tester = new OneInputStreamOperatorTestHarness<>(reader);
    tester.getExecutionConfig().setAutoWatermarkInterval(watermarkInterval);
    tester.setTimeCharacteristic(TimeCharacteristic.IngestionTime);
    reader.setOutputType(typeInfo, tester.getExecutionConfig());
    tester.open();
    Assert.assertEquals(TimeCharacteristic.IngestionTime, tester.getTimeCharacteristic());
    // test that watermarks are correctly emitted
    ConcurrentLinkedQueue<Object> output = tester.getOutput();
    tester.setProcessingTime(201);
    Assert.assertTrue(output.peek() instanceof Watermark);
    Assert.assertEquals(200, ((Watermark) output.poll()).getTimestamp());
    tester.setProcessingTime(301);
    Assert.assertTrue(output.peek() instanceof Watermark);
    Assert.assertEquals(300, ((Watermark) output.poll()).getTimestamp());
    tester.setProcessingTime(401);
    Assert.assertTrue(output.peek() instanceof Watermark);
    Assert.assertEquals(400, ((Watermark) output.poll()).getTimestamp());
    tester.setProcessingTime(501);
    Assert.assertTrue(output.peek() instanceof Watermark);
    Assert.assertEquals(500, ((Watermark) output.poll()).getTimestamp());
    Assert.assertTrue(output.isEmpty());
    // create the necessary splits for the test
    FileInputSplit[] splits = format.createInputSplits(reader.getRuntimeContext().getNumberOfParallelSubtasks());
    // and feed them to the operator
    Map<Integer, List<String>> actualFileContents = new HashMap<>();
    long lastSeenWatermark = Long.MIN_VALUE;
    // counter for the lines read from the splits
    int lineCounter = 0;
    int watermarkCounter = 0;
    for (FileInputSplit split : splits) {
        // set the next "current processing time".
        long nextTimestamp = tester.getProcessingTime() + watermarkInterval;
        tester.setProcessingTime(nextTimestamp);
        // send the next split to be read and wait until it is fully read, the +1 is for the watermark.
        tester.processElement(new StreamRecord<>(new TimestampedFileInputSplit(modTimes.get(split.getPath().getName()), split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())));
        // BUT THIS IS JUST FOR THIS TEST
        while (tester.getOutput().isEmpty() || tester.getOutput().size() != (LINES_PER_FILE + 1)) {
            Thread.sleep(10);
        }
        // verify that the results are the expected
        for (Object line : tester.getOutput()) {
            if (line instanceof StreamRecord) {
                @SuppressWarnings("unchecked") StreamRecord<String> element = (StreamRecord<String>) line;
                lineCounter++;
                Assert.assertEquals(nextTimestamp, element.getTimestamp());
                int fileIdx = Character.getNumericValue(element.getValue().charAt(0));
                List<String> content = actualFileContents.get(fileIdx);
                if (content == null) {
                    content = new ArrayList<>();
                    actualFileContents.put(fileIdx, content);
                }
                content.add(element.getValue() + "\n");
            } else if (line instanceof Watermark) {
                long watermark = ((Watermark) line).getTimestamp();
                Assert.assertEquals(nextTimestamp - (nextTimestamp % watermarkInterval), watermark);
                Assert.assertTrue(watermark > lastSeenWatermark);
                watermarkCounter++;
                lastSeenWatermark = watermark;
            } else {
                Assert.fail("Unknown element in the list.");
            }
        }
        // clean the output to be ready for the next split
        tester.getOutput().clear();
    }
    // now we are processing one split after the other,
    // so all the elements must be here by now.
    Assert.assertEquals(NO_OF_FILES * LINES_PER_FILE, lineCounter);
    // because we expect one watermark per split.
    Assert.assertEquals(splits.length, watermarkCounter);
    // then close the reader gracefully so that the Long.MAX watermark is emitted
    synchronized (tester.getCheckpointLock()) {
        tester.close();
    }
    for (org.apache.hadoop.fs.Path file : filesCreated) {
        hdfs.delete(file, false);
    }
    // check if the last element is the LongMax watermark (by now this must be the only element)
    Assert.assertEquals(1, tester.getOutput().size());
    Assert.assertTrue(tester.getOutput().peek() instanceof Watermark);
    Assert.assertEquals(Long.MAX_VALUE, ((Watermark) tester.getOutput().poll()).getTimestamp());
    // check if the elements are the expected ones.
    Assert.assertEquals(expectedFileContents.size(), actualFileContents.size());
    for (Integer fileIdx : expectedFileContents.keySet()) {
        Assert.assertTrue("file" + fileIdx + " not found", actualFileContents.keySet().contains(fileIdx));
        List<String> cntnt = actualFileContents.get(fileIdx);
        Collections.sort(cntnt, new Comparator<String>() {

            @Override
            public int compare(String o1, String o2) {
                return getLineNo(o1) - getLineNo(o2);
            }
        });
        StringBuilder cntntStr = new StringBuilder();
        for (String line : cntnt) {
            cntntStr.append(line);
        }
        Assert.assertEquals(expectedFileContents.get(fileIdx), cntntStr.toString());
    }
}
Also used : TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) Path(org.apache.flink.core.fs.Path) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 5 with ContinuousFileReaderOperator

use of org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator in project flink by apache.

the class ContinuousFileProcessingTest method testReaderSnapshotRestore.

@Test
public void testReaderSnapshotRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
    TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
    TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
    TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
    final OneShotLatch latch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath));
    TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
    ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
    initReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
    initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    initTestInstance.open();
    // create some state in the reader
    initTestInstance.processElement(new StreamRecord<>(split1));
    initTestInstance.processElement(new StreamRecord<>(split2));
    initTestInstance.processElement(new StreamRecord<>(split3));
    initTestInstance.processElement(new StreamRecord<>(split4));
    // take a snapshot of the operator's state. This will be used
    // to initialize another reader and compare the results of the
    // two operators.
    final OperatorStateHandles snapshot;
    synchronized (initTestInstance.getCheckpointLock()) {
        snapshot = initTestInstance.snapshot(0L, 0L);
    }
    ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>(new BlockingFileInputFormat(latch, new Path(testBasePath)));
    restoredReader.setOutputType(typeInfo, new ExecutionConfig());
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader);
    restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
    restoredTestInstance.initializeState(snapshot);
    restoredTestInstance.open();
    // now let computation start
    latch.trigger();
    synchronized (initTestInstance.getCheckpointLock()) {
        initTestInstance.close();
    }
    synchronized (restoredTestInstance.getCheckpointLock()) {
        restoredTestInstance.close();
    }
    FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
    FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
    FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
    FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
    // compare if the results contain what they should contain and also if
    // they are the same, as they should.
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
    Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
    Assert.assertArrayEquals(initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray());
}
Also used : Path(org.apache.flink.core.fs.Path) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ContinuousFileReaderOperator(org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator) Test(org.junit.Test)

Aggregations

ContinuousFileReaderOperator (org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator)6 TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)6 Path (org.apache.flink.core.fs.Path)5 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)5 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)4 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)4 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)4 Test (org.junit.Test)4 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)2 Watermark (org.apache.flink.streaming.api.watermark.Watermark)2 IOException (java.io.IOException)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)1 OperatorStateHandles (org.apache.flink.streaming.runtime.tasks.OperatorStateHandles)1