Search in sources :

Example 51 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class PageRankCompilerTest method testPageRank.

@Test
public void testPageRank() {
    try {
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // get input data
        DataSet<Long> pagesInput = env.fromElements(1l);
        @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1l, 2l));
        // assign initial rank to pages
        DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.map(new RankAssigner((1.0d / 10)));
        // build adjacency list from link input
        DataSet<Tuple2<Long, Long[]>> adjacencyListInput = linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
        // set iterative data set
        IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);
        Configuration cfg = new Configuration();
        cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
        DataSet<Tuple2<Long, Double>> newRanks = iteration.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg).flatMap(new JoinVertexWithEdgesMatch()).groupBy(0).aggregate(SUM, 1).map(new Dampener(0.85, 10));
        DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(newRanks, newRanks.join(iteration).where(0).equalTo(0).filter(new EpsilonFilter()));
        finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
        // get the plan and compile it
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
        BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();
        // check that the partitioning is pushed out of the first loop
        Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
        Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());
        BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
        Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) EpsilonFilter(org.apache.flink.examples.java.graph.PageRank.EpsilonFilter) Configuration(org.apache.flink.configuration.Configuration) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) BuildOutgoingEdgeList(org.apache.flink.examples.java.graph.PageRank.BuildOutgoingEdgeList) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) RankAssigner(org.apache.flink.examples.java.graph.PageRank.RankAssigner) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Dampener(org.apache.flink.examples.java.graph.PageRank.Dampener) JoinVertexWithEdgesMatch(org.apache.flink.examples.java.graph.PageRank.JoinVertexWithEdgesMatch) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 52 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class ReduceITCase method testAllReduceForTuple.

@Test
public void testAllReduceForTuple() throws Exception {
    /*
		 * All-reduce for tuple
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduce(new AllAddingTuple3Reduce());
    List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
    String expected = "231,91,Hello World\n";
    compareResultAsTuples(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 53 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class ReduceITCase method testReduceOnTuplesWithKeyFieldSelector.

@Test
public void testReduceOnTuplesWithKeyFieldSelector() throws Exception {
    /*
		 * Reduce on tuples with key field selector
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.groupBy(1).reduce(new Tuple3Reduce("B-)"));
    List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
    String expected = "1,1,Hi\n" + "5,2,B-)\n" + "15,3,B-)\n" + "34,4,B-)\n" + "65,5,B-)\n" + "111,6,B-)\n";
    compareResultAsTuples(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 54 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class ReduceITCase method testSupportForDataAndEnumSerialization.

@Test
public void testSupportForDataAndEnumSerialization() throws Exception {
    /**
		 * Test support for Date and enum serialization
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<PojoWithDateAndEnum> ds = env.generateSequence(0, 2).map(new Mapper1());
    ds = ds.union(CollectionDataSets.getPojoWithDateAndEnum(env));
    DataSet<String> res = ds.groupBy("group").reduceGroup(new GroupReducer1());
    List<String> result = res.collect();
    String expected = "ok\nok";
    compareResultAsText(result, expected);
}
Also used : PojoWithDateAndEnum(org.apache.flink.test.javaApiOperators.util.CollectionDataSets.PojoWithDateAndEnum) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Test(org.junit.Test)

Example 55 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class ReduceWithCombinerITCase method testReduceOnKeyedDataset.

@Test
public void testReduceOnKeyedDataset() throws Exception {
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    // creates the input data and distributes them evenly among the available downstream tasks
    DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
    List<Tuple3<String, Integer, Boolean>> actual = input.groupBy(0).reduceGroup(new KeyedCombReducer()).collect();
    String expected = "k1,6,true\nk2,4,true\n";
    compareResultAsTuples(actual, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1247 Test (org.junit.Test)1090 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)374 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)264 Plan (org.apache.flink.api.common.Plan)238 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)236 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)199 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)139 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)138 Vertex (org.apache.flink.graph.Vertex)93 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)73 Edge (org.apache.flink.graph.Edge)70 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)66 ArrayList (java.util.ArrayList)57 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)49 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)44 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 BatchTableEnvironment (org.apache.flink.table.api.java.BatchTableEnvironment)38 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)37 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)35