Search in sources :

Example 6 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class ContinuousFileMonitoringFunction method getInputSplitsSortedByModTime.

/**
	 * Creates the input splits to be forwarded to the downstream tasks of the
	 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
	 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
	 * list will be processed.
	 * @param eligibleFiles The files to process.
	 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(Map<Path, FileStatus> eligibleFiles) throws IOException {
    Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
    if (eligibleFiles.isEmpty()) {
        return splitsByModTime;
    }
    for (FileInputSplit split : format.createInputSplits(readerParallelism)) {
        FileStatus fileStatus = eligibleFiles.get(split.getPath());
        if (fileStatus != null) {
            Long modTime = fileStatus.getModificationTime();
            List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
            if (splitsToForward == null) {
                splitsToForward = new ArrayList<>();
                splitsByModTime.put(modTime, splitsToForward);
            }
            splitsToForward.add(new TimestampedFileInputSplit(modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()));
        }
    }
    return splitsByModTime;
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileStatus(org.apache.flink.core.fs.FileStatus) ArrayList(java.util.ArrayList) List(java.util.List) TreeMap(java.util.TreeMap)

Example 7 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class ContinuousFileProcessingRescalingTest method testReaderScalingDown.

@Test
public void testReaderScalingDown() throws Exception {
    // simulates the scenario of scaling down from 2 to 1 instances
    final OneShotLatch waitingLatch = new OneShotLatch();
    // create the first instance and let it process the first split till element 5
    final OneShotLatch triggerLatch1 = new OneShotLatch();
    BlockingFileInputFormat format1 = new BlockingFileInputFormat(triggerLatch1, waitingLatch, new Path("test"), 20, 5);
    FileInputSplit[] splits = format1.createInputSplits(2);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness1 = getTestHarness(format1, 2, 0);
    testHarness1.open();
    testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[0])));
    // wait until its arrives to element 5
    if (!triggerLatch1.isTriggered()) {
        triggerLatch1.await();
    }
    // create the second instance and let it process the second split till element 15
    final OneShotLatch triggerLatch2 = new OneShotLatch();
    BlockingFileInputFormat format2 = new BlockingFileInputFormat(triggerLatch2, waitingLatch, new Path("test"), 20, 15);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness2 = getTestHarness(format2, 2, 1);
    testHarness2.open();
    testHarness2.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[1])));
    // wait until its arrives to element 15
    if (!triggerLatch2.isTriggered()) {
        triggerLatch2.await();
    }
    // 1) clear the outputs of the two previous instances so that
    // we can compare their newly produced outputs with the merged one
    testHarness1.getOutput().clear();
    testHarness2.getOutput().clear();
    // 2) and take the snapshots from the previous instances and merge them
    // into a new one which will be then used to initialize a third instance
    OperatorStateHandles mergedState = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
    // create the third instance
    final OneShotLatch wLatch = new OneShotLatch();
    final OneShotLatch tLatch = new OneShotLatch();
    BlockingFileInputFormat format = new BlockingFileInputFormat(wLatch, tLatch, new Path("test"), 20, 5);
    OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = getTestHarness(format, 1, 0);
    // initialize the state of the new operator with the constructed by
    // combining the partial states of the instances above.
    testHarness.initializeState(mergedState);
    testHarness.open();
    // now restart the waiting operators
    wLatch.trigger();
    tLatch.trigger();
    waitingLatch.trigger();
    // and wait for the processing to finish
    synchronized (testHarness1.getCheckpointLock()) {
        testHarness1.close();
    }
    synchronized (testHarness2.getCheckpointLock()) {
        testHarness2.close();
    }
    synchronized (testHarness.getCheckpointLock()) {
        testHarness.close();
    }
    Queue<Object> expectedResult = new ArrayDeque<>();
    putElementsInQ(expectedResult, testHarness1.getOutput());
    putElementsInQ(expectedResult, testHarness2.getOutput());
    Queue<Object> actualResult = new ArrayDeque<>();
    putElementsInQ(actualResult, testHarness.getOutput());
    Assert.assertEquals(20, actualResult.size());
    Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) ArrayDeque(java.util.ArrayDeque) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) OperatorStateHandles(org.apache.flink.streaming.runtime.tasks.OperatorStateHandles) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 8 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class PrimitiveInputFormatTest method testIntegerInput.

@Test
public void testIntegerInput() throws IOException {
    try {
        final String fileContent = "111|222|";
        final FileInputSplit split = createInputSplit(fileContent);
        final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.class);
        format.configure(new Configuration());
        format.open(split);
        Integer result = null;
        result = format.nextRecord(result);
        assertEquals(Integer.valueOf(111), result);
        result = format.nextRecord(result);
        assertEquals(Integer.valueOf(222), result);
        result = format.nextRecord(result);
        assertNull(result);
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Test(org.junit.Test)

Example 9 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class PrimitiveInputFormatTest method testDoubleInputLinewise.

@Test
public void testDoubleInputLinewise() throws IOException {
    try {
        final String fileContent = "1.21\n2.23\n";
        final FileInputSplit split = createInputSplit(fileContent);
        final PrimitiveInputFormat<Double> format = new PrimitiveInputFormat<Double>(PATH, Double.class);
        format.configure(new Configuration());
        format.open(split);
        Double result = null;
        result = format.nextRecord(result);
        assertEquals(Double.valueOf(1.21), result);
        result = format.nextRecord(result);
        assertEquals(Double.valueOf(2.23), result);
        result = format.nextRecord(result);
        assertNull(result);
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Test(org.junit.Test)

Example 10 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class PrimitiveInputFormatTest method testRemovingTrailingCR.

@Test
public void testRemovingTrailingCR() {
    try {
        String first = "First line";
        String second = "Second line";
        String fileContent = first + "\r\n" + second + "\r\n";
        final FileInputSplit split = createInputSplit(fileContent);
        final PrimitiveInputFormat<String> format = new PrimitiveInputFormat<String>(PATH, String.class);
        format.configure(new Configuration());
        format.open(split);
        String result = null;
        result = format.nextRecord(result);
        assertEquals(first, result);
        result = format.nextRecord(result);
        assertEquals(second, result);
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

FileInputSplit (org.apache.flink.core.fs.FileInputSplit)140 Test (org.junit.Test)119 Configuration (org.apache.flink.configuration.Configuration)93 Path (org.apache.flink.core.fs.Path)59 IOException (java.io.IOException)45 File (java.io.File)36 FileOutputStream (java.io.FileOutputStream)23 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)20 Row (org.apache.flink.types.Row)20 OutputStreamWriter (java.io.OutputStreamWriter)18 ParseException (org.apache.flink.api.common.io.ParseException)17 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)17 DoubleValue (org.apache.flink.types.DoubleValue)17 IntValue (org.apache.flink.types.IntValue)17 LongValue (org.apache.flink.types.LongValue)17 StringValue (org.apache.flink.types.StringValue)17 Value (org.apache.flink.types.Value)17 Plan (org.apache.flink.api.common.Plan)12 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)12 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)12