Search in sources :

Example 6 with DistributedFunction

use of com.hazelcast.jet.function.DistributedFunction in project hazelcast-jet by hazelcast.

the class GroupTransform method addToDagTwoStage.

// ---------        ---------
// | source0 |  ... | sourceN |
// ---------        ---------
// |                |
// local            local
// partitioned      partitioned
// v                v
// --------------------
// |  accumulateByKeyP  |
// --------------------
// |
// distributed
// partitioned
// v
// ---------------
// | combineByKeyP |
// ---------------
private void addToDagTwoStage(Planner p) {
    List<DistributedFunction<?, ? extends K>> groupKeyFns = this.groupKeyFns;
    String namePrefix = p.uniqueVertexName(this.name(), "-step");
    Vertex v1 = p.dag.newVertex(namePrefix + '1', accumulateByKeyP(groupKeyFns, aggrOp)).localParallelism(localParallelism());
    PlannerVertex pv2 = p.addVertex(this, namePrefix + '2', localParallelism(), combineByKeyP(aggrOp, mapToOutputFn));
    p.addEdges(this, v1, (e, ord) -> e.partitioned(groupKeyFns.get(ord), HASH_CODE));
    p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey()));
}
Also used : Vertex(com.hazelcast.jet.core.Vertex) PlannerVertex(com.hazelcast.jet.impl.pipeline.Planner.PlannerVertex) PlannerVertex(com.hazelcast.jet.impl.pipeline.Planner.PlannerVertex) DistributedFunction(com.hazelcast.jet.function.DistributedFunction)

Example 7 with DistributedFunction

use of com.hazelcast.jet.function.DistributedFunction in project hazelcast-jet-reference-manual by hazelcast.

the class TfIdfCoreApi method createDag.

private static DAG createDag() {
    DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
    DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
    DAG dag = new DAG();
    Vertex stopwordSource = // tag::s2[]
    dag.newVertex("stopword-source", StopwordsP::new);
    // end::s2[]
    Vertex docSource = // tag::s1[]
    dag.newVertex("doc-source", readMapP(DOCID_NAME));
    // end::s1[]
    Vertex docCount = // tag::s4[]
    dag.newVertex("doc-count", Processors.aggregateP(counting()));
    // end::s4[]
    // tag::s5[]
    Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
    // end::s5[]
    Vertex tokenize = // tag::s6[]
    dag.newVertex("tokenize", TokenizeP::new);
    // end::s6[]
    Vertex tf = // tag::s9[]
    dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
    // end::s9[]
    Vertex tfidf = // tag::s10[]
    dag.newVertex("tf-idf", TfIdfP::new);
    // end::s10[]
    Vertex sink = // tag::s12[]
    dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
    // end::s12[]
    stopwordSource.localParallelism(1);
    docSource.localParallelism(1);
    docCount.localParallelism(1);
    docLines.localParallelism(1);
    // tag::s8[]
    dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
    return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Also used : AbstractProcessor(com.hazelcast.jet.core.AbstractProcessor) AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) Traverser(com.hazelcast.jet.Traverser) Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) Processors(com.hazelcast.jet.core.processor.Processors) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) HashMap(java.util.HashMap) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) DistributedBiFunction(com.hazelcast.jet.function.DistributedBiFunction) DistributedFunctions.wholeItem(com.hazelcast.jet.function.DistributedFunctions.wholeItem) ArrayList(java.util.ArrayList) Collections.singletonList(java.util.Collections.singletonList) Traversers.lazy(com.hazelcast.jet.Traversers.lazy) Traversers.traverseIterable(com.hazelcast.jet.Traversers.traverseIterable) Util.entry(com.hazelcast.jet.Util.entry) Map(java.util.Map) Processors.nonCooperativeP(com.hazelcast.jet.core.processor.Processors.nonCooperativeP) Edge.from(com.hazelcast.jet.core.Edge.from) DAG(com.hazelcast.jet.core.DAG) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Processors.flatMapP(com.hazelcast.jet.core.processor.Processors.flatMapP) Nonnull(javax.annotation.Nonnull) Collectors.toSet(java.util.stream.Collectors.toSet) Files(java.nio.file.Files) Set(java.util.Set) IOException(java.io.IOException) Vertex(com.hazelcast.jet.core.Vertex) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) Processors.aggregateByKeyP(com.hazelcast.jet.core.processor.Processors.aggregateByKeyP) SinkProcessors(com.hazelcast.jet.core.processor.SinkProcessors) Entry(java.util.Map.Entry) HASH_CODE(com.hazelcast.jet.core.Partitioner.HASH_CODE) Pattern(java.util.regex.Pattern) Util(com.hazelcast.jet.Util) Edge.between(com.hazelcast.jet.core.Edge.between) Vertex(com.hazelcast.jet.core.Vertex) Entry(java.util.Map.Entry) DAG(com.hazelcast.jet.core.DAG)

Aggregations

DistributedFunction (com.hazelcast.jet.function.DistributedFunction)7 List (java.util.List)5 Collectors.toList (java.util.stream.Collectors.toList)5 Entry (java.util.Map.Entry)4 Util.entry (com.hazelcast.jet.Util.entry)3 LongAccumulator (com.hazelcast.jet.accumulator.LongAccumulator)3 Processors.combineToSlidingWindowP (com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP)3 TimestampedEntry (com.hazelcast.jet.datamodel.TimestampedEntry)3 DistributedBiFunction (com.hazelcast.jet.function.DistributedBiFunction)3 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Collections.singletonList (java.util.Collections.singletonList)3 Nonnull (javax.annotation.Nonnull)3 Before (org.junit.Before)3 Traverser (com.hazelcast.jet.Traverser)2 AggregateOperation (com.hazelcast.jet.aggregate.AggregateOperation)2 AggregateOperation1 (com.hazelcast.jet.aggregate.AggregateOperation1)2 AggregateOperations.counting (com.hazelcast.jet.aggregate.AggregateOperations.counting)2 Edge.between (com.hazelcast.jet.core.Edge.between)2 Processor (com.hazelcast.jet.core.Processor)2