use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method pageRank.
public static PTable<String, PageRankData> pageRank(PTable<String, PageRankData> input, final float d) {
PTypeFamily ptf = input.getTypeFamily();
PTable<String, Float> outbound = input.parallelDo(new DoFn<Pair<String, PageRankData>, Pair<String, Float>>() {
@Override
public void process(Pair<String, PageRankData> input, Emitter<Pair<String, Float>> emitter) {
PageRankData prd = input.second();
for (String link : prd.urls) {
emitter.emit(Pair.of(link, prd.propagatedScore()));
}
}
}, ptf.tableOf(ptf.strings(), ptf.floats()));
return input.cogroup(outbound).parallelDo(new MapFn<Pair<String, Pair<Collection<PageRankData>, Collection<Float>>>, Pair<String, PageRankData>>() {
@Override
public Pair<String, PageRankData> map(Pair<String, Pair<Collection<PageRankData>, Collection<Float>>> input) {
PageRankData prd = Iterables.getOnlyElement(input.second().first());
Collection<Float> propagatedScores = input.second().second();
float sum = 0.0f;
for (Float s : propagatedScores) {
sum += s;
}
return Pair.of(input.first(), prd.next(d + (1.0f - d) * sum));
}
}, input.getPTableType());
}
Aggregations