use of com.hazelcast.jet.function.DistributedFunction in project hazelcast-jet by hazelcast.
the class GroupTransform method addToDagTwoStage.
// --------- ---------
// | source0 | ... | sourceN |
// --------- ---------
// | |
// local local
// partitioned partitioned
// v v
// --------------------
// | accumulateByKeyP |
// --------------------
// |
// distributed
// partitioned
// v
// ---------------
// | combineByKeyP |
// ---------------
private void addToDagTwoStage(Planner p) {
List<DistributedFunction<?, ? extends K>> groupKeyFns = this.groupKeyFns;
String namePrefix = p.uniqueVertexName(this.name(), "-step");
Vertex v1 = p.dag.newVertex(namePrefix + '1', accumulateByKeyP(groupKeyFns, aggrOp)).localParallelism(localParallelism());
PlannerVertex pv2 = p.addVertex(this, namePrefix + '2', localParallelism(), combineByKeyP(aggrOp, mapToOutputFn));
p.addEdges(this, v1, (e, ord) -> e.partitioned(groupKeyFns.get(ord), HASH_CODE));
p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey()));
}
use of com.hazelcast.jet.function.DistributedFunction in project hazelcast-jet-reference-manual by hazelcast.
the class TfIdfCoreApi method createDag.
private static DAG createDag() {
DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
DAG dag = new DAG();
Vertex stopwordSource = // tag::s2[]
dag.newVertex("stopword-source", StopwordsP::new);
// end::s2[]
Vertex docSource = // tag::s1[]
dag.newVertex("doc-source", readMapP(DOCID_NAME));
// end::s1[]
Vertex docCount = // tag::s4[]
dag.newVertex("doc-count", Processors.aggregateP(counting()));
// end::s4[]
// tag::s5[]
Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
// end::s5[]
Vertex tokenize = // tag::s6[]
dag.newVertex("tokenize", TokenizeP::new);
// end::s6[]
Vertex tf = // tag::s9[]
dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
// end::s9[]
Vertex tfidf = // tag::s10[]
dag.newVertex("tf-idf", TfIdfP::new);
// end::s10[]
Vertex sink = // tag::s12[]
dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
// end::s12[]
stopwordSource.localParallelism(1);
docSource.localParallelism(1);
docCount.localParallelism(1);
docLines.localParallelism(1);
// tag::s8[]
dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Aggregations