Search in sources :

Example 1 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class DeltaIterationTranslationTest method testCorrectTranslation.

@Test
public void testCorrectTranslation() {
    try {
        final String JOB_NAME = "Test JobName";
        final String ITERATION_NAME = "Test Name";
        final String BEFORE_NEXT_WORKSET_MAP = "Some Mapper";
        final String AGGREGATOR_NAME = "AggregatorName";
        final int[] ITERATION_KEYS = new int[] { 2 };
        final int NUM_ITERATIONS = 13;
        final int DEFAULT_parallelism = 133;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // ------------ construct the test program ------------------
        {
            env.setParallelism(DEFAULT_parallelism);
            @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
            @SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
            DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
            iteration.name(ITERATION_NAME).parallelism(ITERATION_parallelism);
            iteration.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            // test that multiple workset consumers are supported
            DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
            DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
            DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
            result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
            result.writeAsText("/dev/null");
        }
        Plan p = env.createProgramPlan(JOB_NAME);
        // ------------- validate the plan ----------------
        assertEquals(JOB_NAME, p.getJobName());
        assertEquals(DEFAULT_parallelism, p.getDefaultParallelism());
        // validate the iteration
        GenericDataSinkBase<?> sink1, sink2;
        {
            Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
            sink1 = sinks.next();
            sink2 = sinks.next();
        }
        DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
        // check that multi consumer translation works for iterations
        assertEquals(iteration, sink2.getInput());
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, iteration.getMaximumNumberOfIterations());
        assertArrayEquals(ITERATION_KEYS, iteration.getSolutionSetKeyFields());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
        InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
        InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
        MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
        assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
        assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
        if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
            WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
            assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
        } else {
            assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
        }
        assertEquals(BEFORE_NEXT_WORKSET_MAP, nextWorksetMapper.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Iterator(java.util.Iterator) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DeltaIteration(org.apache.flink.api.java.operators.DeltaIteration) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Plan(org.apache.flink.api.common.Plan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 2 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class PregelTranslationTest method testTranslationPlainEdges.

@Test
public void testTranslationPlainEdges() {
    try {
        final String ITERATION_NAME = "Test Name";
        final String AGGREGATOR_NAME = "AggregatorName";
        final String BC_SET_NAME = "borat messages";
        final int NUM_ITERATIONS = 13;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> bcVar = env.fromElements(1L);
        DataSet<Vertex<String, Double>> result;
        // ------------ construct the test program ------------------
        {
            DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));
            DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));
            Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices, edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

                public Tuple3<String, String, NullValue> map(Tuple2<String, String> edge) {
                    return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            }), env);
            VertexCentricConfiguration parameters = new VertexCentricConfiguration();
            parameters.addBroadcastSet(BC_SET_NAME, bcVar);
            parameters.setName(ITERATION_NAME);
            parameters.setParallelism(ITERATION_parallelism);
            parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            result = graph.runVertexCentricIteration(new MyCompute(), null, NUM_ITERATIONS, parameters).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<String, Double>>());
        }
        // ------------- validate the java program ----------------
        assertTrue(result instanceof DeltaIterationResultSet);
        DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
        DeltaIteration<?, ?> iteration = resultSet.getIterationHead();
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
        assertArrayEquals(new int[] { 0 }, resultSet.getKeyPositions());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
        TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup = (TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();
        // validate that the broadcast sets are forwarded
        assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DeltaIterationResultSet(org.apache.flink.api.java.operators.DeltaIterationResultSet) TwoInputUdfOperator(org.apache.flink.api.java.operators.TwoInputUdfOperator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) Graph(org.apache.flink.graph.Graph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 3 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class SpargelTranslationTest method testTranslationPlainEdgesWithForkedBroadcastVariable.

@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
    try {
        final String ITERATION_NAME = "Test Name";
        final String AGGREGATOR_NAME = "AggregatorName";
        final String BC_SET_MESSAGES_NAME = "borat messages";
        final String BC_SET_UPDATES_NAME = "borat updates";
        final int NUM_ITERATIONS = 13;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> bcVar = env.fromElements(1L);
        DataSet<Vertex<String, Double>> result;
        // ------------ construct the test program ------------------
        {
            DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));
            DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));
            Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices, edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

                public Tuple3<String, String, NullValue> map(Tuple2<String, String> edge) {
                    return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            }), env);
            ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
            parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
            parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
            parameters.setName(ITERATION_NAME);
            parameters.setParallelism(ITERATION_parallelism);
            parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(), NUM_ITERATIONS, parameters).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<String, Double>>());
        }
        // ------------- validate the java program ----------------
        assertTrue(result instanceof DeltaIterationResultSet);
        DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
        DeltaIteration<?, ?> iteration = resultSet.getIterationHead();
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
        assertArrayEquals(new int[] { 0 }, resultSet.getKeyPositions());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
        // validate that the semantic properties are set as they should
        TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
        assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
        assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));
        TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();
        // validate that the broadcast sets are forwarded
        assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
        assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DeltaIterationResultSet(org.apache.flink.api.java.operators.DeltaIterationResultSet) TwoInputUdfOperator(org.apache.flink.api.java.operators.TwoInputUdfOperator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) Graph(org.apache.flink.graph.Graph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 4 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class GSATranslationTest method testTranslation.

@Test
public void testTranslation() {
    try {
        final String ITERATION_NAME = "Test Name";
        final String AGGREGATOR_NAME = "AggregatorName";
        final String BC_SET_GATHER_NAME = "gather messages";
        final String BC_SET_SUM_NAME = "sum updates";
        final String BC_SET_APLLY_NAME = "apply updates";
        final int NUM_ITERATIONS = 13;
        final int ITERATION_parallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> bcGather = env.fromElements(1L);
        DataSet<Long> bcSum = env.fromElements(1L);
        DataSet<Long> bcApply = env.fromElements(1L);
        DataSet<Vertex<Long, Long>> result;
        // ------------ construct the test program ------------------
        {
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
            GSAConfiguration parameters = new GSAConfiguration();
            parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
            parameters.setName(ITERATION_NAME);
            parameters.setParallelism(ITERATION_parallelism);
            parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
            parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
            parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);
            result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        // ------------- validate the java program ----------------
        assertTrue(result instanceof DeltaIterationResultSet);
        DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
        DeltaIteration<?, ?> iteration = resultSet.getIterationHead();
        // check the basic iteration properties
        assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
        assertArrayEquals(new int[] { 0 }, resultSet.getKeyPositions());
        assertEquals(ITERATION_parallelism, iteration.getParallelism());
        assertEquals(ITERATION_NAME, iteration.getName());
        assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
        // validate that the semantic properties are set as they should
        TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
        assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
        assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));
        SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
        SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();
        // validate that the broadcast sets are forwarded
        assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
        assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
        assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DeltaIterationResultSet(org.apache.flink.api.java.operators.DeltaIterationResultSet) TwoInputUdfOperator(org.apache.flink.api.java.operators.TwoInputUdfOperator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) NullValue(org.apache.flink.types.NullValue) SingleInputUdfOperator(org.apache.flink.api.java.operators.SingleInputUdfOperator) Graph(org.apache.flink.graph.Graph) Tuple3ToEdgeMap(org.apache.flink.graph.utils.Tuple3ToEdgeMap) Test(org.junit.Test)

Example 5 with LongSumAggregator

use of org.apache.flink.api.common.aggregators.LongSumAggregator in project flink by apache.

the class ScatterGatherConfigurationITCase method testRunWithConfiguration.

@Test
public void testRunWithConfiguration() throws Exception {
    /*
		 * Test Graph's runScatterGatherIteration when configuration parameters are provided
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Graph<Long, Long, Long> graph = Graph.fromCollection(TestGraphUtils.getLongLongVertices(), TestGraphUtils.getLongLongEdges(), env).mapVertices(new AssignOneMapper());
    // create the configuration object
    ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
    parameters.addBroadcastSetForScatterFunction("messagingBcastSet", env.fromElements(4, 5, 6));
    parameters.addBroadcastSetForGatherFunction("updateBcastSet", env.fromElements(1, 2, 3));
    parameters.registerAggregator("superstepAggregator", new LongSumAggregator());
    parameters.setOptNumVertices(true);
    Graph<Long, Long, Long> res = graph.runScatterGatherIteration(new MessageFunction(), new UpdateFunction(), 10, parameters);
    DataSet<Vertex<Long, Long>> data = res.getVertices();
    List<Vertex<Long, Long>> result = data.collect();
    expectedResult = "1,11\n" + "2,11\n" + "3,11\n" + "4,11\n" + "5,11";
    compareResultAsTuples(result, expectedResult);
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) ScatterGatherConfiguration(org.apache.flink.graph.spargel.ScatterGatherConfiguration) Test(org.junit.Test)

Aggregations

LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 Test (org.junit.Test)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 Vertex (org.apache.flink.graph.Vertex)6 DataSet (org.apache.flink.api.java.DataSet)5 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)5 DeltaIterationResultSet (org.apache.flink.api.java.operators.DeltaIterationResultSet)4 TwoInputUdfOperator (org.apache.flink.api.java.operators.TwoInputUdfOperator)4 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)4 Graph (org.apache.flink.graph.Graph)4 Plan (org.apache.flink.api.common.Plan)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 Method (java.lang.reflect.Method)1 Iterator (java.util.Iterator)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 AggregatorRegistry (org.apache.flink.api.common.aggregators.AggregatorRegistry)1 AggregatorWithName (org.apache.flink.api.common.aggregators.AggregatorWithName)1 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1