use of com.hazelcast.jet.core.Partitioner.HASH_CODE in project hazelcast-jet-reference-manual by hazelcast.
the class TfIdfCoreApi method createDag.
private static DAG createDag() {
DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
DAG dag = new DAG();
Vertex stopwordSource = // tag::s2[]
dag.newVertex("stopword-source", StopwordsP::new);
// end::s2[]
Vertex docSource = // tag::s1[]
dag.newVertex("doc-source", readMapP(DOCID_NAME));
// end::s1[]
Vertex docCount = // tag::s4[]
dag.newVertex("doc-count", Processors.aggregateP(counting()));
// end::s4[]
// tag::s5[]
Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
// end::s5[]
Vertex tokenize = // tag::s6[]
dag.newVertex("tokenize", TokenizeP::new);
// end::s6[]
Vertex tf = // tag::s9[]
dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
// end::s9[]
Vertex tfidf = // tag::s10[]
dag.newVertex("tf-idf", TfIdfP::new);
// end::s10[]
Vertex sink = // tag::s12[]
dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
// end::s12[]
stopwordSource.localParallelism(1);
docSource.localParallelism(1);
docCount.localParallelism(1);
docLines.localParallelism(1);
// tag::s8[]
dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Aggregations