use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class PropertyDataSourceTest method checkCoPartitionedSources2.
@Test
public void checkCoPartitionedSources2() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class);
data1.getSplitDataProperties().splitsPartitionedBy("byCountry", 0);
DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class);
data2.getSplitDataProperties().splitsPartitionedBy("byDate", 0);
data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();
GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
LocalProperties lprops1 = sourceNode1.getLocalProperties();
GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
LocalProperties lprops2 = sourceNode2.getLocalProperties();
Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
Assert.assertTrue(lprops1.getGroupedFields() == null);
Assert.assertTrue(lprops1.getOrdering() == null);
Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
Assert.assertTrue(lprops2.getGroupedFields() == null);
Assert.assertTrue(lprops2.getOrdering() == null);
Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInputBehindMap.
/**
* Tests cross program with replicated data source behind map and filter.
*/
@Test
public void checkCrossWithReplicatedSourceInputBehindMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.map(new IdMap()).filter(new NoFilter()).cross(source2).writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when cross should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInput.
/**
* Tests cross program with replicated data source.
*/
@Test
public void checkCrossWithReplicatedSourceInput() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.cross(source2).writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when cross should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMapPartition.
/**
* Tests join program with replicated data source behind map partition.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.mapPartition(new IdPMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInput.
/**
* Tests join program with replicated data source.
*/
@Test
public void checkJoinWithReplicatedSourceInput() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Aggregations