Search in sources :

Example 11 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class JobGraphGeneratorTest method testResourcesForDeltaIteration.

/**
	 * Verifies that the resources are set onto each job vertex correctly when generating job graph
	 * which covers the delta iteration case
	 */
@Test
public void testResourcesForDeltaIteration() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    ResourceSpec resource6 = new ResourceSpec(0.6, 600);
    Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method deltaMethod = DeltaIteration.class.getDeclaredMethod("setResources", ResourceSpec.class);
    deltaMethod.setAccessible(true);
    Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> mapFunction = new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> map(Tuple2<Long, Long> value) throws Exception {
            return value;
        }
    };
    FilterFunction<Tuple2<Long, Long>> filterFunction = new FilterFunction<Tuple2<Long, Long>>() {

        @Override
        public boolean filter(Tuple2<Long, Long> value) throws Exception {
            return false;
        }
    };
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L));
    opMethod.invoke(input, resource1);
    // CHAIN(Map -> Filter)
    DataSet<Tuple2<Long, Long>> map = input.map(mapFunction);
    opMethod.invoke(map, resource2);
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = map.iterateDelta(map, 100, 0).registerAggregator("test", new LongSumAggregator());
    deltaMethod.invoke(iteration, resource3);
    DataSet<Tuple2<Long, Long>> delta = iteration.getWorkset().map(mapFunction);
    opMethod.invoke(delta, resource4);
    DataSet<Tuple2<Long, Long>> feedback = delta.filter(filterFunction);
    opMethod.invoke(feedback, resource5);
    DataSink<Tuple2<Long, Long>> sink = iteration.closeWith(delta, feedback).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    sinkMethod.invoke(sink, resource6);
    Plan plan = env.createProgramPlan();
    Optimizer pc = new Optimizer(new Configuration());
    OptimizedPlan op = pc.compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(op);
    JobVertex sourceMapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    JobVertex deltaVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
    JobVertex iterationTailVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
    JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
    JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(5);
    JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(6);
    assertTrue(sourceMapVertex.getMinResources().equals(resource1.merge(resource2)));
    assertTrue(iterationHeadVertex.getPreferredResources().equals(resource3));
    assertTrue(deltaVertex.getMinResources().equals(resource4));
    // the iteration tail task will be scheduled in the same instance with iteration head, and currently not set resources.
    assertTrue(iterationTailVertex.getPreferredResources().equals(ResourceSpec.DEFAULT));
    assertTrue(feedbackVertex.getMinResources().equals(resource5));
    assertTrue(sinkVertex.getPreferredResources().equals(resource6));
    assertTrue(iterationSyncVertex.getMinResources().equals(resource3));
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Optimizer(org.apache.flink.optimizer.Optimizer) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 12 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class AggregatorConvergenceITCase method testConnectedComponentsWithParametrizableConvergence.

@Test
public void testConnectedComponentsWithParametrizableConvergence() throws Exception {
    // name of the aggregator that checks for convergence
    final String UPDATED_ELEMENTS = "updated.elements.aggr";
    // the iteration stops if less than this number of elements change value
    final long convergence_threshold = 3;
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
    DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);
    IterativeDataSet<Tuple2<Long, Long>> iteration = initialSolutionSet.iterate(10);
    // register the convergence criterion
    iteration.registerAggregationConvergenceCriterion(UPDATED_ELEMENTS, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergence_threshold));
    DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()).groupBy(0).min(1);
    DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration).where(0).equalTo(0).flatMap(new MinimumIdFilter(UPDATED_ELEMENTS));
    List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId).collect();
    Collections.sort(result, new JavaProgramTestBase.TupleComparator<Tuple2<Long, Long>>());
    assertEquals(expectedResult, result);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JavaProgramTestBase(org.apache.flink.test.util.JavaProgramTestBase) Test(org.junit.Test)

Example 13 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class AggregatorsITCase method testConvergenceCriterionWithParameterForIterate.

@Test
public void testConvergenceCriterionWithParameterForIterate() throws Exception {
    /*
		 * Test convergence criterion with parameter for iterate
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
    IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);
    // register aggregator
    LongSumAggregator aggr = new LongSumAggregator();
    iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
    // register convergence criterion
    iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3));
    DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
    iteration.closeWith(updatedDs).writeAsText(resultPath);
    env.execute();
    expected = "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n" + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n" + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Test(org.junit.Test)

Example 14 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class AggregatorsITCase method testConvergenceCriterionWithParameterForIterateDelta.

@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
    /*
		 * Test convergence criterion with parameter for iterate delta
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
    DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, MAX_ITERATIONS, 0);
    // register aggregator
    LongSumAggregator aggr = new LongSumAggregator();
    iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
    // register convergence criterion
    iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3));
    DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());
    DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()).where(0).equalTo(0).projectFirst(0, 1);
    DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
    DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
    result.writeAsText(resultPath);
    env.execute();
    expected = "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n" + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n" + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 15 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class AggregatorsITCase method testAggregatorWithoutParameterForIterateDelta.

@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
    /*
		 * Test aggregator without parameter for iterateDelta
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
    DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, MAX_ITERATIONS, 0);
    // register aggregator
    LongSumAggregator aggr = new LongSumAggregator();
    iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
    DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
    DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()).where(0).equalTo(0).flatMap(new UpdateFilter());
    DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
    DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
    result.writeAsText(resultPath);
    env.execute();
    expected = "1\n" + "2\n" + "2\n" + "3\n" + "3\n" + "3\n" + "4\n" + "4\n" + "4\n" + "4\n" + "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Aggregations

LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 Test (org.junit.Test)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 Vertex (org.apache.flink.graph.Vertex)6 DataSet (org.apache.flink.api.java.DataSet)5 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)5 DeltaIterationResultSet (org.apache.flink.api.java.operators.DeltaIterationResultSet)4 TwoInputUdfOperator (org.apache.flink.api.java.operators.TwoInputUdfOperator)4 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)4 Graph (org.apache.flink.graph.Graph)4 Plan (org.apache.flink.api.common.Plan)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 Method (java.lang.reflect.Method)1 Iterator (java.util.Iterator)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 AggregatorRegistry (org.apache.flink.api.common.aggregators.AggregatorRegistry)1 AggregatorWithName (org.apache.flink.api.common.aggregators.AggregatorWithName)1 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1