Search in sources :

Example 1 with DistributedBiFunction

use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet by hazelcast.

the class HashJoinTransform method addToDag.

// ---------           ----------           ----------
// | primary |         | joined-1 |         | joined-2 |
// ---------           ----------           ----------
// |                   |                     |
// |              distributed          distributed
// |               broadcast            broadcast
// |                   v                     v
// |             -------------         -------------
// |            | collector-1 |       | collector-2 |
// |             -------------         -------------
// |                   |                     |
// |                 local                 local
// local             broadcast             broadcast
// unicast           prioritized           prioritized
// ordinal 0           ordinal 1             ordinal 2
// \                   |                     |
// ----------------\  |   /----------------/
// v  v  v
// --------
// | joiner |
// --------
@Override
@SuppressWarnings("unchecked")
public void addToDag(Planner p) {
    String namePrefix = p.uniqueVertexName(this.name(), "");
    PlannerVertex primary = p.xform2vertex.get(this.upstream().get(0));
    List keyFns = this.clauses.stream().map(JoinClause::leftKeyFn).collect(toList());
    List<Tag> tags = this.tags;
    DistributedBiFunction mapToOutputBiFn = this.mapToOutputBiFn;
    DistributedTriFunction mapToOutputTriFn = this.mapToOutputTriFn;
    Vertex joiner = p.addVertex(this, namePrefix + "-joiner", localParallelism(), () -> new HashJoinP<>(keyFns, tags, mapToOutputBiFn, mapToOutputTriFn)).v;
    p.dag.edge(from(primary.v, primary.nextAvailableOrdinal()).to(joiner, 0));
    String collectorName = namePrefix + "-collector";
    int collectorOrdinal = 1;
    for (Transform fromTransform : tailList(this.upstream())) {
        PlannerVertex fromPv = p.xform2vertex.get(fromTransform);
        JoinClause<?, ?, ?, ?> clause = this.clauses.get(collectorOrdinal - 1);
        DistributedFunction<Object, Object> getKeyFn = (DistributedFunction<Object, Object>) clause.rightKeyFn();
        DistributedFunction<Object, Object> projectFn = (DistributedFunction<Object, Object>) clause.rightProjectFn();
        Vertex collector = p.dag.newVertex(collectorName + collectorOrdinal, () -> new HashJoinCollectP(getKeyFn, projectFn));
        collector.localParallelism(1);
        p.dag.edge(from(fromPv.v, fromPv.nextAvailableOrdinal()).to(collector, 0).distributed().broadcast());
        p.dag.edge(from(collector, 0).to(joiner, collectorOrdinal).broadcast().priority(-1));
        collectorOrdinal++;
    }
}
Also used : Vertex(com.hazelcast.jet.core.Vertex) PlannerVertex(com.hazelcast.jet.impl.pipeline.Planner.PlannerVertex) HashJoinCollectP(com.hazelcast.jet.impl.processor.HashJoinCollectP) HashJoinP(com.hazelcast.jet.impl.processor.HashJoinP) PlannerVertex(com.hazelcast.jet.impl.pipeline.Planner.PlannerVertex) DistributedBiFunction(com.hazelcast.jet.function.DistributedBiFunction) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Planner.tailList(com.hazelcast.jet.impl.pipeline.Planner.tailList) Tag(com.hazelcast.jet.datamodel.Tag) DistributedTriFunction(com.hazelcast.jet.function.DistributedTriFunction) DistributedFunction(com.hazelcast.jet.function.DistributedFunction)

Example 2 with DistributedBiFunction

use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet by hazelcast.

the class GrAggBuilder method buildBatch.

@SuppressWarnings("unchecked")
public <A, R, OUT> BatchStage<OUT> buildBatch(@Nonnull AggregateOperation<A, ? extends R> aggrOp, @Nonnull DistributedBiFunction<? super K, ? super R, OUT> mapToOutputFn) {
    List<Transform> upstreamTransforms = upstreamStages.stream().map(s -> s.transform).collect(toList());
    Transform transform = new GroupTransform<>(upstreamTransforms, keyFns, aggrOp, mapToOutputFn);
    pipelineImpl.connect(upstreamTransforms, transform);
    return new BatchStageImpl<>(transform, pipelineImpl);
}
Also used : WindowGroupTransform(com.hazelcast.jet.impl.pipeline.transform.WindowGroupTransform) DistributedBiFunction(com.hazelcast.jet.function.DistributedBiFunction) ArrayList(java.util.ArrayList) BatchStage(com.hazelcast.jet.pipeline.BatchStage) Tag.tag(com.hazelcast.jet.datamodel.Tag.tag) KeyedWindowResultFunction(com.hazelcast.jet.function.KeyedWindowResultFunction) AggregateOperation(com.hazelcast.jet.aggregate.AggregateOperation) WindowGroupAggregateBuilder(com.hazelcast.jet.pipeline.WindowGroupAggregateBuilder) StageWithGroupingAndWindow(com.hazelcast.jet.pipeline.StageWithGroupingAndWindow) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) GroupTransform(com.hazelcast.jet.impl.pipeline.transform.GroupTransform) Nonnull(javax.annotation.Nonnull) StreamStage(com.hazelcast.jet.pipeline.StreamStage) WindowDefinition(com.hazelcast.jet.pipeline.WindowDefinition) StageWithGrouping(com.hazelcast.jet.pipeline.StageWithGrouping) JetEventFunctionAdapter.adaptAggregateOperation(com.hazelcast.jet.impl.pipeline.JetEventFunctionAdapter.adaptAggregateOperation) Tag(com.hazelcast.jet.datamodel.Tag) Transform(com.hazelcast.jet.impl.pipeline.transform.Transform) ADAPT_TO_JET_EVENT(com.hazelcast.jet.impl.pipeline.ComputeStageImplBase.ADAPT_TO_JET_EVENT) List(java.util.List) ComputeStageImplBase.ensureJetEvents(com.hazelcast.jet.impl.pipeline.ComputeStageImplBase.ensureJetEvents) Collectors.toList(java.util.stream.Collectors.toList) JetEventFunctionAdapter.adaptKeyFn(com.hazelcast.jet.impl.pipeline.JetEventFunctionAdapter.adaptKeyFn) StreamStageWithGrouping(com.hazelcast.jet.pipeline.StreamStageWithGrouping) GroupAggregateBuilder(com.hazelcast.jet.pipeline.GroupAggregateBuilder) WindowGroupTransform(com.hazelcast.jet.impl.pipeline.transform.WindowGroupTransform) GroupTransform(com.hazelcast.jet.impl.pipeline.transform.GroupTransform) WindowGroupTransform(com.hazelcast.jet.impl.pipeline.transform.WindowGroupTransform) GroupTransform(com.hazelcast.jet.impl.pipeline.transform.GroupTransform) Transform(com.hazelcast.jet.impl.pipeline.transform.Transform)

Example 3 with DistributedBiFunction

use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet-reference-manual by hazelcast.

the class TfIdfCoreApi method createDag.

private static DAG createDag() {
    DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
    DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
    DAG dag = new DAG();
    Vertex stopwordSource = // tag::s2[]
    dag.newVertex("stopword-source", StopwordsP::new);
    // end::s2[]
    Vertex docSource = // tag::s1[]
    dag.newVertex("doc-source", readMapP(DOCID_NAME));
    // end::s1[]
    Vertex docCount = // tag::s4[]
    dag.newVertex("doc-count", Processors.aggregateP(counting()));
    // end::s4[]
    // tag::s5[]
    Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
    // end::s5[]
    Vertex tokenize = // tag::s6[]
    dag.newVertex("tokenize", TokenizeP::new);
    // end::s6[]
    Vertex tf = // tag::s9[]
    dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
    // end::s9[]
    Vertex tfidf = // tag::s10[]
    dag.newVertex("tf-idf", TfIdfP::new);
    // end::s10[]
    Vertex sink = // tag::s12[]
    dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
    // end::s12[]
    stopwordSource.localParallelism(1);
    docSource.localParallelism(1);
    docCount.localParallelism(1);
    docLines.localParallelism(1);
    // tag::s8[]
    dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
    return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Also used : AbstractProcessor(com.hazelcast.jet.core.AbstractProcessor) AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) Traverser(com.hazelcast.jet.Traverser) Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) Processors(com.hazelcast.jet.core.processor.Processors) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) HashMap(java.util.HashMap) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) DistributedBiFunction(com.hazelcast.jet.function.DistributedBiFunction) DistributedFunctions.wholeItem(com.hazelcast.jet.function.DistributedFunctions.wholeItem) ArrayList(java.util.ArrayList) Collections.singletonList(java.util.Collections.singletonList) Traversers.lazy(com.hazelcast.jet.Traversers.lazy) Traversers.traverseIterable(com.hazelcast.jet.Traversers.traverseIterable) Util.entry(com.hazelcast.jet.Util.entry) Map(java.util.Map) Processors.nonCooperativeP(com.hazelcast.jet.core.processor.Processors.nonCooperativeP) Edge.from(com.hazelcast.jet.core.Edge.from) DAG(com.hazelcast.jet.core.DAG) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Processors.flatMapP(com.hazelcast.jet.core.processor.Processors.flatMapP) Nonnull(javax.annotation.Nonnull) Collectors.toSet(java.util.stream.Collectors.toSet) Files(java.nio.file.Files) Set(java.util.Set) IOException(java.io.IOException) Vertex(com.hazelcast.jet.core.Vertex) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) Processors.aggregateByKeyP(com.hazelcast.jet.core.processor.Processors.aggregateByKeyP) SinkProcessors(com.hazelcast.jet.core.processor.SinkProcessors) Entry(java.util.Map.Entry) HASH_CODE(com.hazelcast.jet.core.Partitioner.HASH_CODE) Pattern(java.util.regex.Pattern) Util(com.hazelcast.jet.Util) Edge.between(com.hazelcast.jet.core.Edge.between) Vertex(com.hazelcast.jet.core.Vertex) Entry(java.util.Map.Entry) DAG(com.hazelcast.jet.core.DAG)

Aggregations

DistributedBiFunction (com.hazelcast.jet.function.DistributedBiFunction)3 DistributedFunction (com.hazelcast.jet.function.DistributedFunction)3 List (java.util.List)3 Collectors.toList (java.util.stream.Collectors.toList)3 Vertex (com.hazelcast.jet.core.Vertex)2 Tag (com.hazelcast.jet.datamodel.Tag)2 ArrayList (java.util.ArrayList)2 Nonnull (javax.annotation.Nonnull)2 Traverser (com.hazelcast.jet.Traverser)1 Traversers.lazy (com.hazelcast.jet.Traversers.lazy)1 Traversers.traverseIterable (com.hazelcast.jet.Traversers.traverseIterable)1 Traversers.traverseStream (com.hazelcast.jet.Traversers.traverseStream)1 Util (com.hazelcast.jet.Util)1 Util.entry (com.hazelcast.jet.Util.entry)1 AggregateOperation (com.hazelcast.jet.aggregate.AggregateOperation)1 AggregateOperations.counting (com.hazelcast.jet.aggregate.AggregateOperations.counting)1 AbstractProcessor (com.hazelcast.jet.core.AbstractProcessor)1 DAG (com.hazelcast.jet.core.DAG)1 Edge.between (com.hazelcast.jet.core.Edge.between)1 Edge.from (com.hazelcast.jet.core.Edge.from)1