use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DistinctCompilationTest method testDistinctWithCombineHint.
@Test
public void testDistinctWithCombineHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ConnectedComponentsTest method getConnectedComponentsPlan.
private static Plan getConnectedComponentsPlan(int parallelism, int iterations, boolean solutionSetFirst) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Long, Long>> verticesWithId = env.generateSequence(0, 1000).name("Vertices").map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
}).name("Assign Vertex Ids");
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, iterations, 0).name("Connected Components Iteration");
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 0L)).name("Edges");
DataSet<Tuple2<Long, Long>> minCandidateId = iteration.getWorkset().join(edges).where(0).equalTo(0).projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor").groupBy(0).min(1).name("Find Minimum Candidate Id");
DataSet<Tuple2<Long, Long>> updateComponentId;
if (solutionSetFirst) {
updateComponentId = iteration.getSolutionSet().join(minCandidateId).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> current, Tuple2<Long, Long> candidate, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
} else {
updateComponentId = minCandidateId.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void join(Tuple2<Long, Long> candidate, Tuple2<Long, Long> current, Collector<Tuple2<Long, Long>> out) {
if (candidate.f1 < current.f1) {
out.collect(candidate);
}
}
}).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
}
iteration.closeWith(updateComponentId, updateComponentId).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("Result");
return env.createProgramPlan();
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DeltaIterationTranslationTest method testCorrectTranslation.
@Test
public void testCorrectTranslation() {
try {
final String JOB_NAME = "Test JobName";
final String ITERATION_NAME = "Test Name";
final String BEFORE_NEXT_WORKSET_MAP = "Some Mapper";
final String AGGREGATOR_NAME = "AggregatorName";
final int[] ITERATION_KEYS = new int[] { 2 };
final int NUM_ITERATIONS = 13;
final int DEFAULT_parallelism = 133;
final int ITERATION_parallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// ------------ construct the test program ------------------
{
env.setParallelism(DEFAULT_parallelism);
@SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
@SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
iteration.name(ITERATION_NAME).parallelism(ITERATION_parallelism);
iteration.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
// test that multiple workset consumers are supported
DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
result.writeAsText("/dev/null");
}
Plan p = env.createProgramPlan(JOB_NAME);
// ------------- validate the plan ----------------
assertEquals(JOB_NAME, p.getJobName());
assertEquals(DEFAULT_parallelism, p.getDefaultParallelism());
// validate the iteration
GenericDataSinkBase<?> sink1, sink2;
{
Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
sink1 = sinks.next();
sink2 = sinks.next();
}
DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
// check that multi consumer translation works for iterations
assertEquals(iteration, sink2.getInput());
// check the basic iteration properties
assertEquals(NUM_ITERATIONS, iteration.getMaximumNumberOfIterations());
assertArrayEquals(ITERATION_KEYS, iteration.getSolutionSetKeyFields());
assertEquals(ITERATION_parallelism, iteration.getParallelism());
assertEquals(ITERATION_NAME, iteration.getName());
MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
} else {
assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
}
assertEquals(BEFORE_NEXT_WORKSET_MAP, nextWorksetMapper.getName());
assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.
private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
// output 1
data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) {
return false;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
// output 2 does a join before a join
data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
// output 3 is direct
data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
assertEquals(mapNode, joinNode.getInput1().getSource());
assertEquals(mapNode, directSink.getPredecessor());
assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.
the class ReduceCompilationTest method testAllReduceWithCombiner.
@Test
public void testAllReduceWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
data.reduce(new RichReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2) {
return value1 + value2;
}
}).name("reducer").output(new DiscardingOutputFormat<Long>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
// check parallelism
assertEquals(8, sourceNode.getParallelism());
assertEquals(8, combineNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Aggregations