Search in sources :

Example 1 with SumScore

use of org.apache.flink.graph.library.linkanalysis.Functions.SumScore in project flink by apache.

the class HITS method runInternal.

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input) throws Exception {
    DataSet<Tuple2<K, K>> edges = input.getEdges().map(new ExtractEdgeIDs<>()).setParallelism(parallelism).name("Extract edge IDs");
    // ID, hub, authority
    DataSet<Tuple3<K, DoubleValue, DoubleValue>> initialScores = edges.map(new InitializeScores<>()).setParallelism(parallelism).name("Initial scores").groupBy(0).reduce(new SumScores<>()).setCombineHint(CombineHint.HASH).setParallelism(parallelism).name("Sum");
    IterativeDataSet<Tuple3<K, DoubleValue, DoubleValue>> iterative = initialScores.iterate(maxIterations).setParallelism(parallelism);
    // ID, hubbiness
    DataSet<Tuple2<K, DoubleValue>> hubbiness = iterative.coGroup(edges).where(0).equalTo(1).with(new Hubbiness<>()).setParallelism(parallelism).name("Hub").groupBy(0).reduce(new SumScore<>()).setCombineHint(CombineHint.HASH).setParallelism(parallelism).name("Sum");
    // sum-of-hubbiness-squared
    DataSet<DoubleValue> hubbinessSumSquared = hubbiness.map(new Square<>()).setParallelism(parallelism).name("Square").reduce(new Sum()).setParallelism(parallelism).name("Sum");
    // ID, new authority
    DataSet<Tuple2<K, DoubleValue>> authority = hubbiness.coGroup(edges).where(0).equalTo(0).with(new Authority<>()).setParallelism(parallelism).name("Authority").groupBy(0).reduce(new SumScore<>()).setCombineHint(CombineHint.HASH).setParallelism(parallelism).name("Sum");
    // sum-of-authority-squared
    DataSet<DoubleValue> authoritySumSquared = authority.map(new Square<>()).setParallelism(parallelism).name("Square").reduce(new Sum()).setParallelism(parallelism).name("Sum");
    // ID, normalized hubbiness, normalized authority
    DataSet<Tuple3<K, DoubleValue, DoubleValue>> scores = hubbiness.fullOuterJoin(authority, JoinHint.REPARTITION_SORT_MERGE).where(0).equalTo(0).with(new JoinAndNormalizeHubAndAuthority<>()).withBroadcastSet(hubbinessSumSquared, HUBBINESS_SUM_SQUARED).withBroadcastSet(authoritySumSquared, AUTHORITY_SUM_SQUARED).setParallelism(parallelism).name("Join scores");
    DataSet<Tuple3<K, DoubleValue, DoubleValue>> passThrough;
    if (convergenceThreshold < Double.MAX_VALUE) {
        passThrough = iterative.fullOuterJoin(scores, JoinHint.REPARTITION_SORT_MERGE).where(0).equalTo(0).with(new ChangeInScores<>()).setParallelism(parallelism).name("Change in scores");
        iterative.registerAggregationConvergenceCriterion(CHANGE_IN_SCORES, new DoubleSumAggregator(), new ScoreConvergence(convergenceThreshold));
    } else {
        passThrough = scores;
    }
    return iterative.closeWith(passThrough).map(new TranslateResult<>()).setParallelism(parallelism).name("Map result");
}
Also used : DoubleValue(org.apache.flink.types.DoubleValue) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DoubleSumAggregator(org.apache.flink.api.common.aggregators.DoubleSumAggregator) SumScore(org.apache.flink.graph.library.linkanalysis.Functions.SumScore)

Example 2 with SumScore

use of org.apache.flink.graph.library.linkanalysis.Functions.SumScore in project flink by apache.

the class PageRank method runInternal.

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input) throws Exception {
    // vertex degree
    DataSet<Vertex<K, Degrees>> vertexDegree = input.run(new VertexDegrees<K, VV, EV>().setIncludeZeroDegreeVertices(includeZeroDegreeVertices).setParallelism(parallelism));
    // vertex count
    DataSet<LongValue> vertexCount = GraphUtils.count(vertexDegree);
    // s, t, d(s)
    DataSet<Edge<K, LongValue>> edgeSourceDegree = input.run(new EdgeSourceDegrees<K, VV, EV>().setParallelism(parallelism)).map(new ExtractSourceDegree<>()).setParallelism(parallelism).name("Extract source degree");
    // vertices with zero in-edges
    DataSet<Tuple2<K, DoubleValue>> sourceVertices = vertexDegree.flatMap(new InitializeSourceVertices<>()).setParallelism(parallelism).name("Initialize source vertex scores");
    // s, initial pagerank(s)
    DataSet<Tuple2<K, DoubleValue>> initialScores = vertexDegree.map(new InitializeVertexScores<>()).withBroadcastSet(vertexCount, VERTEX_COUNT).setParallelism(parallelism).name("Initialize scores");
    IterativeDataSet<Tuple2<K, DoubleValue>> iterative = initialScores.iterate(maxIterations).setParallelism(parallelism);
    // s, projected pagerank(s)
    DataSet<Tuple2<K, DoubleValue>> vertexScores = iterative.coGroup(edgeSourceDegree).where(0).equalTo(0).with(new SendScore<>()).setParallelism(parallelism).name("Send score").groupBy(0).reduce(new SumScore<>()).setCombineHint(CombineHint.HASH).setParallelism(parallelism).name("Sum");
    // ignored ID, total pagerank
    DataSet<Tuple2<K, DoubleValue>> sumOfScores = vertexScores.reduce(new SumVertexScores<>()).setParallelism(parallelism).name("Sum");
    // s, adjusted pagerank(s)
    DataSet<Tuple2<K, DoubleValue>> adjustedScores = vertexScores.union(sourceVertices).name("Union with source vertices").map(new AdjustScores<>(dampingFactor)).withBroadcastSet(sumOfScores, SUM_OF_SCORES).withBroadcastSet(vertexCount, VERTEX_COUNT).setParallelism(parallelism).name("Adjust scores");
    DataSet<Tuple2<K, DoubleValue>> passThrough;
    if (convergenceThreshold < Double.MAX_VALUE) {
        passThrough = iterative.join(adjustedScores).where(0).equalTo(0).with(new ChangeInScores<>()).setParallelism(parallelism).name("Change in scores");
        iterative.registerAggregationConvergenceCriterion(CHANGE_IN_SCORES, new DoubleSumAggregator(), new ScoreConvergence(convergenceThreshold));
    } else {
        passThrough = adjustedScores;
    }
    return iterative.closeWith(passThrough).map(new TranslateResult<>()).setParallelism(parallelism).name("Map result");
}
Also used : Vertex(org.apache.flink.graph.Vertex) DoubleSumAggregator(org.apache.flink.api.common.aggregators.DoubleSumAggregator) SumScore(org.apache.flink.graph.library.linkanalysis.Functions.SumScore) Tuple2(org.apache.flink.api.java.tuple.Tuple2) LongValue(org.apache.flink.types.LongValue) Edge(org.apache.flink.graph.Edge)

Aggregations

DoubleSumAggregator (org.apache.flink.api.common.aggregators.DoubleSumAggregator)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 SumScore (org.apache.flink.graph.library.linkanalysis.Functions.SumScore)2 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)1 Edge (org.apache.flink.graph.Edge)1 Vertex (org.apache.flink.graph.Vertex)1 DoubleValue (org.apache.flink.types.DoubleValue)1 LongValue (org.apache.flink.types.LongValue)1