use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ContinuousFileProcessingRescalingTest method testReaderScalingDown.
@Test
public void testReaderScalingDown() throws Exception {
// simulates the scenario of scaling down from 2 to 1 instances
final OneShotLatch waitingLatch = new OneShotLatch();
// create the first instance and let it process the first split till element 5
final OneShotLatch triggerLatch1 = new OneShotLatch();
BlockingFileInputFormat format1 = new BlockingFileInputFormat(triggerLatch1, waitingLatch, new Path("test"), 20, 5);
FileInputSplit[] splits = format1.createInputSplits(2);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness1 = getTestHarness(format1, 2, 0);
testHarness1.open();
testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[0])));
// wait until its arrives to element 5
if (!triggerLatch1.isTriggered()) {
triggerLatch1.await();
}
// create the second instance and let it process the second split till element 15
final OneShotLatch triggerLatch2 = new OneShotLatch();
BlockingFileInputFormat format2 = new BlockingFileInputFormat(triggerLatch2, waitingLatch, new Path("test"), 20, 15);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness2 = getTestHarness(format2, 2, 1);
testHarness2.open();
testHarness2.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[1])));
// wait until its arrives to element 15
if (!triggerLatch2.isTriggered()) {
triggerLatch2.await();
}
// 1) clear the outputs of the two previous instances so that
// we can compare their newly produced outputs with the merged one
testHarness1.getOutput().clear();
testHarness2.getOutput().clear();
// 2) and take the snapshots from the previous instances and merge them
// into a new one which will be then used to initialize a third instance
OperatorStateHandles mergedState = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
// create the third instance
final OneShotLatch wLatch = new OneShotLatch();
final OneShotLatch tLatch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(wLatch, tLatch, new Path("test"), 20, 5);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = getTestHarness(format, 1, 0);
// initialize the state of the new operator with the constructed by
// combining the partial states of the instances above.
testHarness.initializeState(mergedState);
testHarness.open();
// now restart the waiting operators
wLatch.trigger();
tLatch.trigger();
waitingLatch.trigger();
// and wait for the processing to finish
synchronized (testHarness1.getCheckpointLock()) {
testHarness1.close();
}
synchronized (testHarness2.getCheckpointLock()) {
testHarness2.close();
}
synchronized (testHarness.getCheckpointLock()) {
testHarness.close();
}
Queue<Object> expectedResult = new ArrayDeque<>();
putElementsInQ(expectedResult, testHarness1.getOutput());
putElementsInQ(expectedResult, testHarness2.getOutput());
Queue<Object> actualResult = new ArrayDeque<>();
putElementsInQ(actualResult, testHarness.getOutput());
Assert.assertEquals(20, actualResult.size());
Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method testCreateWithCsvFile.
@Test
public void testCreateWithCsvFile() throws Exception {
/*
* Test with two Csv files one with Vertex Data and one with Edges data
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent = "1,1\n" + "2,2\n" + "3,3\n";
final FileInputSplit split = createTempFile(fileContent);
final String fileContent2 = "1,2,ot\n" + "3,2,tt\n" + "3,1,to\n";
final FileInputSplit split2 = createTempFile(fileContent2);
Graph<Long, Long, String> graph = Graph.fromCsvReader(split.getPath().toString(), split2.getPath().toString(), env).types(Long.class, Long.class, String.class);
List<Triplet<Long, Long, String>> result = graph.getTriplets().collect();
expectedResult = "1,2,1,2,ot\n" + "3,2,3,2,tt\n" + "3,1,3,1,to\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method testCsvWithConstantValueMapper.
@Test
public void testCsvWithConstantValueMapper() throws Exception {
/*
*Test fromCsvReader with edge path and a mapper that assigns a Double constant as value
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent = "1,2,ot\n" + "3,2,tt\n" + "3,1,to\n";
final FileInputSplit split = createTempFile(fileContent);
Graph<Long, Double, String> graph = Graph.fromCsvReader(split.getPath().toString(), new AssignDoubleValueMapper(), env).types(Long.class, Double.class, String.class);
List<Triplet<Long, Double, String>> result = graph.getTriplets().collect();
//graph.getTriplets().writeAsCsv(resultPath);
expectedResult = "1,2,0.1,0.1,ot\n" + "3,1,0.1,0.1,to\n" + "3,2,0.1,0.1,tt\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method testCreateCsvFileDelimiterConfiguration.
@Test
public void testCreateCsvFileDelimiterConfiguration() throws Exception {
/*
* Test with an Edge and Vertex csv file. Tests the configuration methods FieldDelimiterEdges and
* FieldDelimiterVertices
* Also tests the configuration methods LineDelimiterEdges and LineDelimiterVertices
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent = "header\n1;1\n" + "2;2\n" + "3;3\n";
final FileInputSplit split = createTempFile(fileContent);
final String fileContent2 = "header|1:2:ot|" + "3:2:tt|" + "3:1:to|";
final FileInputSplit split2 = createTempFile(fileContent2);
Graph<Long, Long, String> graph = Graph.fromCsvReader(split.getPath().toString(), split2.getPath().toString(), env).ignoreFirstLineEdges().ignoreFirstLineVertices().fieldDelimiterEdges(":").fieldDelimiterVertices(";").lineDelimiterEdges("|").types(Long.class, Long.class, String.class);
List<Triplet<Long, Long, String>> result = graph.getTriplets().collect();
expectedResult = "1,2,1,2,ot\n" + "3,2,3,2,tt\n" + "3,1,3,1,to\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method createTempFile.
private FileInputSplit createTempFile(String content) throws IOException {
File tempFile = File.createTempFile("test_contents", "tmp");
tempFile.deleteOnExit();
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), Charset.forName("UTF-8"));
wrt.write(content);
wrt.close();
return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] { "localhost" });
}
Aggregations