use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method testCreateWithOnlyEdgesCsvFile.
@Test
public void testCreateWithOnlyEdgesCsvFile() throws Exception {
/*
* Test with one Csv file one with Edges data. Also tests the configuration method ignoreFistLineEdges()
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String fileContent2 = "header\n1,2,ot\n" + "3,2,tt\n" + "3,1,to\n";
final FileInputSplit split2 = createTempFile(fileContent2);
Graph<Long, NullValue, String> graph = Graph.fromCsvReader(split2.getPath().toString(), env).ignoreFirstLineEdges().ignoreCommentsVertices("hi").edgeTypes(Long.class, String.class);
List<Triplet<Long, NullValue, String>> result = graph.getTriplets().collect();
expectedResult = "1,2,(null),(null),ot\n" + "3,2,(null),(null),tt\n" + "3,1,(null),(null),to\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class GraphCreationWithCsvITCase method testCsvWithNullEdge.
@Test
public void testCsvWithNullEdge() throws Exception {
/*
Test fromCsvReader with edge and vertex path and nullvalue for edge
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
final String vertexFileContent = "1,one\n" + "2,two\n" + "3,three\n";
final String edgeFileContent = "1,2\n" + "3,2\n" + "3,1\n";
final FileInputSplit split = createTempFile(vertexFileContent);
final FileInputSplit edgeSplit = createTempFile(edgeFileContent);
Graph<Long, String, NullValue> graph = Graph.fromCsvReader(split.getPath().toString(), edgeSplit.getPath().toString(), env).vertexTypes(Long.class, String.class);
List<Triplet<Long, String, NullValue>> result = graph.getTriplets().collect();
expectedResult = "1,2,one,two,(null)\n" + "3,2,three,two,(null)\n" + "3,1,three,one,(null)\n";
compareResultAsTuples(result, expectedResult);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMap.
/**
* Tests join program with replicated data source behind map.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.map(new IdMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputChangingparallelism.
/**
* Tests compiler fail for join program with replicated data source and changing parallelism.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM + 2).writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindFlatMap.
/**
* Tests join program with replicated data source behind flatMap.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.flatMap(new IdFlatMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Aggregations