Search in sources :

Example 1 with HASH_CODE

use of com.hazelcast.jet.core.Partitioner.HASH_CODE in project hazelcast-jet-reference-manual by hazelcast.

the class TfIdfCoreApi method createDag.

private static DAG createDag() {
    DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
    DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
    DAG dag = new DAG();
    Vertex stopwordSource = // tag::s2[]
    dag.newVertex("stopword-source", StopwordsP::new);
    // end::s2[]
    Vertex docSource = // tag::s1[]
    dag.newVertex("doc-source", readMapP(DOCID_NAME));
    // end::s1[]
    Vertex docCount = // tag::s4[]
    dag.newVertex("doc-count", Processors.aggregateP(counting()));
    // end::s4[]
    // tag::s5[]
    Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
    // end::s5[]
    Vertex tokenize = // tag::s6[]
    dag.newVertex("tokenize", TokenizeP::new);
    // end::s6[]
    Vertex tf = // tag::s9[]
    dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
    // end::s9[]
    Vertex tfidf = // tag::s10[]
    dag.newVertex("tf-idf", TfIdfP::new);
    // end::s10[]
    Vertex sink = // tag::s12[]
    dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
    // end::s12[]
    stopwordSource.localParallelism(1);
    docSource.localParallelism(1);
    docCount.localParallelism(1);
    docLines.localParallelism(1);
    // tag::s8[]
    dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
    return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Also used : AbstractProcessor(com.hazelcast.jet.core.AbstractProcessor) AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) Traverser(com.hazelcast.jet.Traverser) Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) Processors(com.hazelcast.jet.core.processor.Processors) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) HashMap(java.util.HashMap) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) DistributedBiFunction(com.hazelcast.jet.function.DistributedBiFunction) DistributedFunctions.wholeItem(com.hazelcast.jet.function.DistributedFunctions.wholeItem) ArrayList(java.util.ArrayList) Collections.singletonList(java.util.Collections.singletonList) Traversers.lazy(com.hazelcast.jet.Traversers.lazy) Traversers.traverseIterable(com.hazelcast.jet.Traversers.traverseIterable) Util.entry(com.hazelcast.jet.Util.entry) Map(java.util.Map) Processors.nonCooperativeP(com.hazelcast.jet.core.processor.Processors.nonCooperativeP) Edge.from(com.hazelcast.jet.core.Edge.from) DAG(com.hazelcast.jet.core.DAG) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Processors.flatMapP(com.hazelcast.jet.core.processor.Processors.flatMapP) Nonnull(javax.annotation.Nonnull) Collectors.toSet(java.util.stream.Collectors.toSet) Files(java.nio.file.Files) Set(java.util.Set) IOException(java.io.IOException) Vertex(com.hazelcast.jet.core.Vertex) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) Processors.aggregateByKeyP(com.hazelcast.jet.core.processor.Processors.aggregateByKeyP) SinkProcessors(com.hazelcast.jet.core.processor.SinkProcessors) Entry(java.util.Map.Entry) HASH_CODE(com.hazelcast.jet.core.Partitioner.HASH_CODE) Pattern(java.util.regex.Pattern) Util(com.hazelcast.jet.Util) Edge.between(com.hazelcast.jet.core.Edge.between) Vertex(com.hazelcast.jet.core.Vertex) Entry(java.util.Map.Entry) DAG(com.hazelcast.jet.core.DAG)

Aggregations

Traverser (com.hazelcast.jet.Traverser)1 Traversers.lazy (com.hazelcast.jet.Traversers.lazy)1 Traversers.traverseIterable (com.hazelcast.jet.Traversers.traverseIterable)1 Traversers.traverseStream (com.hazelcast.jet.Traversers.traverseStream)1 Util (com.hazelcast.jet.Util)1 Util.entry (com.hazelcast.jet.Util.entry)1 AggregateOperations.counting (com.hazelcast.jet.aggregate.AggregateOperations.counting)1 AbstractProcessor (com.hazelcast.jet.core.AbstractProcessor)1 DAG (com.hazelcast.jet.core.DAG)1 Edge.between (com.hazelcast.jet.core.Edge.between)1 Edge.from (com.hazelcast.jet.core.Edge.from)1 HASH_CODE (com.hazelcast.jet.core.Partitioner.HASH_CODE)1 Vertex (com.hazelcast.jet.core.Vertex)1 Processors (com.hazelcast.jet.core.processor.Processors)1 Processors.aggregateByKeyP (com.hazelcast.jet.core.processor.Processors.aggregateByKeyP)1 Processors.flatMapP (com.hazelcast.jet.core.processor.Processors.flatMapP)1 Processors.nonCooperativeP (com.hazelcast.jet.core.processor.Processors.nonCooperativeP)1 SinkProcessors (com.hazelcast.jet.core.processor.SinkProcessors)1 SourceProcessors.readMapP (com.hazelcast.jet.core.processor.SourceProcessors.readMapP)1 DistributedBiFunction (com.hazelcast.jet.function.DistributedBiFunction)1