use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ContinuousFileMonitoringFunction method getInputSplitsSortedByModTime.
/**
* Creates the input splits to be forwarded to the downstream tasks of the
* {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
* being forwarded and only splits belonging to files in the {@code eligibleFiles}
* list will be processed.
* @param eligibleFiles The files to process.
*/
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(Map<Path, FileStatus> eligibleFiles) throws IOException {
Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
if (eligibleFiles.isEmpty()) {
return splitsByModTime;
}
for (FileInputSplit split : format.createInputSplits(readerParallelism)) {
FileStatus fileStatus = eligibleFiles.get(split.getPath());
if (fileStatus != null) {
Long modTime = fileStatus.getModificationTime();
List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
if (splitsToForward == null) {
splitsToForward = new ArrayList<>();
splitsByModTime.put(modTime, splitsToForward);
}
splitsToForward.add(new TimestampedFileInputSplit(modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()));
}
}
return splitsByModTime;
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ContinuousFileProcessingRescalingTest method testReaderScalingDown.
@Test
public void testReaderScalingDown() throws Exception {
// simulates the scenario of scaling down from 2 to 1 instances
final OneShotLatch waitingLatch = new OneShotLatch();
// create the first instance and let it process the first split till element 5
final OneShotLatch triggerLatch1 = new OneShotLatch();
BlockingFileInputFormat format1 = new BlockingFileInputFormat(triggerLatch1, waitingLatch, new Path("test"), 20, 5);
FileInputSplit[] splits = format1.createInputSplits(2);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness1 = getTestHarness(format1, 2, 0);
testHarness1.open();
testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[0])));
// wait until its arrives to element 5
if (!triggerLatch1.isTriggered()) {
triggerLatch1.await();
}
// create the second instance and let it process the second split till element 15
final OneShotLatch triggerLatch2 = new OneShotLatch();
BlockingFileInputFormat format2 = new BlockingFileInputFormat(triggerLatch2, waitingLatch, new Path("test"), 20, 15);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness2 = getTestHarness(format2, 2, 1);
testHarness2.open();
testHarness2.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[1])));
// wait until its arrives to element 15
if (!triggerLatch2.isTriggered()) {
triggerLatch2.await();
}
// 1) clear the outputs of the two previous instances so that
// we can compare their newly produced outputs with the merged one
testHarness1.getOutput().clear();
testHarness2.getOutput().clear();
// 2) and take the snapshots from the previous instances and merge them
// into a new one which will be then used to initialize a third instance
OperatorStateHandles mergedState = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
// create the third instance
final OneShotLatch wLatch = new OneShotLatch();
final OneShotLatch tLatch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(wLatch, tLatch, new Path("test"), 20, 5);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = getTestHarness(format, 1, 0);
// initialize the state of the new operator with the constructed by
// combining the partial states of the instances above.
testHarness.initializeState(mergedState);
testHarness.open();
// now restart the waiting operators
wLatch.trigger();
tLatch.trigger();
waitingLatch.trigger();
// and wait for the processing to finish
synchronized (testHarness1.getCheckpointLock()) {
testHarness1.close();
}
synchronized (testHarness2.getCheckpointLock()) {
testHarness2.close();
}
synchronized (testHarness.getCheckpointLock()) {
testHarness.close();
}
Queue<Object> expectedResult = new ArrayDeque<>();
putElementsInQ(expectedResult, testHarness1.getOutput());
putElementsInQ(expectedResult, testHarness2.getOutput());
Queue<Object> actualResult = new ArrayDeque<>();
putElementsInQ(actualResult, testHarness.getOutput());
Assert.assertEquals(20, actualResult.size());
Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class PrimitiveInputFormatTest method testIntegerInput.
@Test
public void testIntegerInput() throws IOException {
try {
final String fileContent = "111|222|";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.class);
format.configure(new Configuration());
format.open(split);
Integer result = null;
result = format.nextRecord(result);
assertEquals(Integer.valueOf(111), result);
result = format.nextRecord(result);
assertEquals(Integer.valueOf(222), result);
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class PrimitiveInputFormatTest method testDoubleInputLinewise.
@Test
public void testDoubleInputLinewise() throws IOException {
try {
final String fileContent = "1.21\n2.23\n";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<Double> format = new PrimitiveInputFormat<Double>(PATH, Double.class);
format.configure(new Configuration());
format.open(split);
Double result = null;
result = format.nextRecord(result);
assertEquals(Double.valueOf(1.21), result);
result = format.nextRecord(result);
assertEquals(Double.valueOf(2.23), result);
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class PrimitiveInputFormatTest method testRemovingTrailingCR.
@Test
public void testRemovingTrailingCR() {
try {
String first = "First line";
String second = "Second line";
String fileContent = first + "\r\n" + second + "\r\n";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<String> format = new PrimitiveInputFormat<String>(PATH, String.class);
format.configure(new Configuration());
format.open(split);
String result = null;
result = format.nextRecord(result);
assertEquals(first, result);
result = format.nextRecord(result);
assertEquals(second, result);
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
}
}
Aggregations