use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DeltaIterationTranslationTest method testCorrectTranslation.
@Test
public void testCorrectTranslation() {
try {
final String jobName = "Test JobName";
final String iterationName = "Test Name";
final String beforeNextWorksetMap = "Some Mapper";
final String aggregatorName = "AggregatorName";
final int[] iterationKeys = new int[] { 2 };
final int numIterations = 13;
final int defaultParallelism = 133;
final int iterationParallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// ------------ construct the test program ------------------
{
env.setParallelism(defaultParallelism);
@SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
@SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, numIterations, iterationKeys);
iteration.name(iterationName).parallelism(iterationParallelism);
iteration.registerAggregator(aggregatorName, new LongSumAggregator());
// test that multiple workset consumers are supported
DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(beforeNextWorksetMap));
result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
result.writeAsText("/dev/null");
}
Plan p = env.createProgramPlan(jobName);
// ------------- validate the plan ----------------
assertEquals(jobName, p.getJobName());
assertEquals(defaultParallelism, p.getDefaultParallelism());
// validate the iteration
GenericDataSinkBase<?> sink1, sink2;
{
Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
sink1 = sinks.next();
sink2 = sinks.next();
}
DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
// check that multi consumer translation works for iterations
assertEquals(iteration, sink2.getInput());
// check the basic iteration properties
assertEquals(numIterations, iteration.getMaximumNumberOfIterations());
assertArrayEquals(iterationKeys, iteration.getSolutionSetKeyFields());
assertEquals(iterationParallelism, iteration.getParallelism());
assertEquals(iterationName, iteration.getName());
MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
} else {
assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
}
assertEquals(beforeNextWorksetMap, nextWorksetMapper.getName());
assertEquals(aggregatorName, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class PregelCompilerTest method testPregelCompilerWithBroadcastVariable.
@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
final String broadcastSetName = "broadcast";
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Long> bcVar = env.fromElements(1L);
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
VertexCentricConfiguration parameters = new VertexCentricConfiguration();
parameters.addBroadcastSet(broadcastSetName, bcVar);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100, parameters).getVertices();
result.output(new DiscardingOutputFormat<>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testGroupedReduceWithSelectorFunctionKey.
@Test
public void testGroupedReduceWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {
public String getKey(Tuple2<String, Double> value) {
return value.f0;
}
}).reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testAllReduceWithCombiner.
@Test
public void testAllReduceWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
data.reduce(new RichReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2) {
return value1 + value2;
}
}).name("reducer").output(new DiscardingOutputFormat<Long>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
// check parallelism
assertEquals(8, sourceNode.getParallelism());
assertEquals(8, combineNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.
private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
// output 1
data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) {
return false;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
// output 2 does a join before a join
data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
// output 3 is direct
data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
assertEquals(mapNode, joinNode.getInput1().getSource());
assertEquals(mapNode, directSink.getPredecessor());
assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations