use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DistinctCompilationTest method testDistinctWithSelectorFunctionKey.
@Test
public void testDistinctWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.distinct(new KeySelector<Tuple2<String, Double>, String>() {
public String getKey(Tuple2<String, Double> value) {
return value.f0;
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testGroupedReduceWithFieldPositionKey.
@Test
public void testGroupedReduceWithFieldPositionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(1).reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testAllReduceNoCombiner.
@Test
public void testAllReduceNoCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
data.reduce(new RichReduceFunction<Double>() {
@Override
public Double reduce(Double value1, Double value2) {
return value1 + value2;
}
}).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// the all-reduce has no combiner, when the parallelism of the input is one
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check parallelism
assertEquals(1, sourceNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testGroupedReduceWithHint.
@Test
public void testGroupedReduceWithHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {
public String getKey(Tuple2<String, Double> value) {
return value.f0;
}
}).reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DataExchangeModeClosedBranchingTest method verifyBranchingJoiningPlan.
private void verifyBranchingJoiningPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toReduceCombiner, DataExchangeMode toReduce, DataExchangeMode toFilter, DataExchangeMode toReduceSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toOtherReduceCombiner, DataExchangeMode toOtherReduce, DataExchangeMode toFlatMap, DataExchangeMode toFlatMapSink, DataExchangeMode toCoGroup1, DataExchangeMode toCoGroup2, DataExchangeMode toCoGroupSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("reduceSink");
DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return false;
}
});
DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("flatMapSink");
joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("cgSink");
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode reduceSink = findSink(optPlan.getDataSinks(), "reduceSink");
SinkPlanNode flatMapSink = findSink(optPlan.getDataSinks(), "flatMapSink");
SinkPlanNode cgSink = findSink(optPlan.getDataSinks(), "cgSink");
DualInputPlanNode coGroupNode = (DualInputPlanNode) cgSink.getPredecessor();
DualInputPlanNode joinNode = (DualInputPlanNode) coGroupNode.getInput1().getSource();
SingleInputPlanNode otherReduceNode = (SingleInputPlanNode) coGroupNode.getInput2().getSource();
SingleInputPlanNode otherReduceCombinerNode = (SingleInputPlanNode) otherReduceNode.getPredecessor();
SingleInputPlanNode reduceNode = (SingleInputPlanNode) joinNode.getInput1().getSource();
SingleInputPlanNode reduceCombinerNode = (SingleInputPlanNode) reduceNode.getPredecessor();
assertEquals(reduceNode, reduceSink.getPredecessor());
SingleInputPlanNode filterNode = (SingleInputPlanNode) joinNode.getInput2().getSource();
assertEquals(filterNode, otherReduceCombinerNode.getPredecessor());
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
assertEquals(mapNode, reduceCombinerNode.getPredecessor());
SingleInputPlanNode flatMapNode = (SingleInputPlanNode) flatMapSink.getPredecessor();
assertEquals(joinNode, flatMapNode.getPredecessor());
// verify the data exchange modes
assertEquals(toReduceSink, reduceSink.getInput().getDataExchangeMode());
assertEquals(toFlatMapSink, flatMapSink.getInput().getDataExchangeMode());
assertEquals(toCoGroupSink, cgSink.getInput().getDataExchangeMode());
assertEquals(toCoGroup1, coGroupNode.getInput1().getDataExchangeMode());
assertEquals(toCoGroup2, coGroupNode.getInput2().getDataExchangeMode());
assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
assertEquals(toOtherReduce, otherReduceNode.getInput().getDataExchangeMode());
assertEquals(toOtherReduceCombiner, otherReduceCombinerNode.getInput().getDataExchangeMode());
assertEquals(toFlatMap, flatMapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
assertEquals(toReduceCombiner, reduceCombinerNode.getInput().getDataExchangeMode());
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations