Search in sources :

Example 11 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class DistinctCompilationTest method testDistinctWithCombineHint.

@Test
public void testDistinctWithCombineHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
        assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
        assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 12 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class ConnectedComponentsTest method getConnectedComponentsPlan.

private static Plan getConnectedComponentsPlan(int parallelism, int iterations, boolean solutionSetFirst) {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    DataSet<Tuple2<Long, Long>> verticesWithId = env.generateSequence(0, 1000).name("Vertices").map(new MapFunction<Long, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> map(Long value) {
            return new Tuple2<Long, Long>(value, value);
        }
    }).name("Assign Vertex Ids");
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, iterations, 0).name("Connected Components Iteration");
    @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 0L)).name("Edges");
    DataSet<Tuple2<Long, Long>> minCandidateId = iteration.getWorkset().join(edges).where(0).equalTo(0).projectSecond(1).<Tuple2<Long, Long>>projectFirst(1).name("Join Candidate Id With Neighbor").groupBy(0).min(1).name("Find Minimum Candidate Id");
    DataSet<Tuple2<Long, Long>> updateComponentId;
    if (solutionSetFirst) {
        updateComponentId = iteration.getSolutionSet().join(minCandidateId).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void join(Tuple2<Long, Long> current, Tuple2<Long, Long> candidate, Collector<Tuple2<Long, Long>> out) {
                if (candidate.f1 < current.f1) {
                    out.collect(candidate);
                }
            }
        }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
    } else {
        updateComponentId = minCandidateId.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void join(Tuple2<Long, Long> candidate, Tuple2<Long, Long> current, Collector<Tuple2<Long, Long>> out) {
                if (candidate.f1 < current.f1) {
                    out.collect(candidate);
                }
            }
        }).withForwardedFieldsFirst("0").withForwardedFieldsSecond("0").name("Update Component Id");
    }
    iteration.closeWith(updateComponentId, updateComponentId).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("Result");
    return env.createProgramPlan();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) MapFunction(org.apache.flink.api.common.functions.MapFunction) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat)

Example 13 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class DeltaIterationTranslationTest method testCorrectTranslation.

@Test
public void testCorrectTranslation() {
    try {
        final String JOB_NAME = "Test JobName";
        final String ITERATION_NAME = "Test Name";
        final String BEFORE_NEXT_WORKSET_MAP = "Some Mapper";
        final String AGGREGATOR_NAME = "AggregatorName";
        final int[] ITERATION_KEYS = new int[] { 2 };
        final int NUM_ITERATIONS = 13;
        final int DEFAULT_parallelism = 133;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // ------------ construct the test program ------------------
        {
            env.setParallelism(DEFAULT_parallelism);
            @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
            @SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
            DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
            iteration.name(ITERATION_NAME).parallelism(ITERATION_parallelism);
            iteration.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            // test that multiple workset consumers are supported
            DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
            DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
            DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
            result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
            result.writeAsText("/dev/null");
        }
        Plan p = env.createProgramPlan(JOB_NAME);
        // ------------- validate the plan ----------------
        assertEquals(JOB_NAME, p.getJobName());
        assertEquals(DEFAULT_parallelism, p.getDefaultParallelism());
        // validate the iteration
        GenericDataSinkBase<?> sink1, sink2;
        {
            Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
            sink1 = sinks.next();
            sink2 = sinks.next();
        }
        DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
        // check that multi consumer translation works for iterations
        assertEquals(iteration, sink2.getInput());
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, iteration.getMaximumNumberOfIterations());
        assertArrayEquals(ITERATION_KEYS, iteration.getSolutionSetKeyFields());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
        InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
        InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
        MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
        assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
        assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
        if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
            WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
            assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
        } else {
            assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
        }
        assertEquals(BEFORE_NEXT_WORKSET_MAP, nextWorksetMapper.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Iterator(java.util.Iterator) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DeltaIteration(org.apache.flink.api.java.operators.DeltaIteration) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Plan(org.apache.flink.api.common.Plan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 14 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.

private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        // output 1
        data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
        // output 2 does a join before a join
        data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
        SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
        SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
        SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
        assertEquals(mapNode, joinNode.getInput1().getSource());
        assertEquals(mapNode, directSink.getPredecessor());
        assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
        assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
        assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
        assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 15 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class ReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        data.reduce(new RichReduceFunction<Long>() {

            @Override
            public Long reduce(Long value1, Long value2) {
                return value1 + value2;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 Test (org.junit.Test)35 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)33 Plan (org.apache.flink.api.common.Plan)28 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)28 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)21 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)17 DataSet (org.apache.flink.api.java.DataSet)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 Graph (org.apache.flink.graph.Graph)10 Channel (org.apache.flink.optimizer.plan.Channel)8 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 NullValue (org.apache.flink.types.NullValue)7 MapFunction (org.apache.flink.api.common.functions.MapFunction)6 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)6 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)5 Edge (org.apache.flink.graph.Edge)5