use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet by hazelcast.
the class HashJoinTransform method addToDag.
// --------- ---------- ----------
// | primary | | joined-1 | | joined-2 |
// --------- ---------- ----------
// | | |
// | distributed distributed
// | broadcast broadcast
// | v v
// | ------------- -------------
// | | collector-1 | | collector-2 |
// | ------------- -------------
// | | |
// | local local
// local broadcast broadcast
// unicast prioritized prioritized
// ordinal 0 ordinal 1 ordinal 2
// \ | |
// ----------------\ | /----------------/
// v v v
// --------
// | joiner |
// --------
@Override
@SuppressWarnings("unchecked")
public void addToDag(Planner p) {
String namePrefix = p.uniqueVertexName(this.name(), "");
PlannerVertex primary = p.xform2vertex.get(this.upstream().get(0));
List keyFns = this.clauses.stream().map(JoinClause::leftKeyFn).collect(toList());
List<Tag> tags = this.tags;
DistributedBiFunction mapToOutputBiFn = this.mapToOutputBiFn;
DistributedTriFunction mapToOutputTriFn = this.mapToOutputTriFn;
Vertex joiner = p.addVertex(this, namePrefix + "-joiner", localParallelism(), () -> new HashJoinP<>(keyFns, tags, mapToOutputBiFn, mapToOutputTriFn)).v;
p.dag.edge(from(primary.v, primary.nextAvailableOrdinal()).to(joiner, 0));
String collectorName = namePrefix + "-collector";
int collectorOrdinal = 1;
for (Transform fromTransform : tailList(this.upstream())) {
PlannerVertex fromPv = p.xform2vertex.get(fromTransform);
JoinClause<?, ?, ?, ?> clause = this.clauses.get(collectorOrdinal - 1);
DistributedFunction<Object, Object> getKeyFn = (DistributedFunction<Object, Object>) clause.rightKeyFn();
DistributedFunction<Object, Object> projectFn = (DistributedFunction<Object, Object>) clause.rightProjectFn();
Vertex collector = p.dag.newVertex(collectorName + collectorOrdinal, () -> new HashJoinCollectP(getKeyFn, projectFn));
collector.localParallelism(1);
p.dag.edge(from(fromPv.v, fromPv.nextAvailableOrdinal()).to(collector, 0).distributed().broadcast());
p.dag.edge(from(collector, 0).to(joiner, collectorOrdinal).broadcast().priority(-1));
collectorOrdinal++;
}
}
use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet by hazelcast.
the class GrAggBuilder method buildBatch.
@SuppressWarnings("unchecked")
public <A, R, OUT> BatchStage<OUT> buildBatch(@Nonnull AggregateOperation<A, ? extends R> aggrOp, @Nonnull DistributedBiFunction<? super K, ? super R, OUT> mapToOutputFn) {
List<Transform> upstreamTransforms = upstreamStages.stream().map(s -> s.transform).collect(toList());
Transform transform = new GroupTransform<>(upstreamTransforms, keyFns, aggrOp, mapToOutputFn);
pipelineImpl.connect(upstreamTransforms, transform);
return new BatchStageImpl<>(transform, pipelineImpl);
}
use of com.hazelcast.jet.function.DistributedBiFunction in project hazelcast-jet-reference-manual by hazelcast.
the class TfIdfCoreApi method createDag.
private static DAG createDag() {
DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
DAG dag = new DAG();
Vertex stopwordSource = // tag::s2[]
dag.newVertex("stopword-source", StopwordsP::new);
// end::s2[]
Vertex docSource = // tag::s1[]
dag.newVertex("doc-source", readMapP(DOCID_NAME));
// end::s1[]
Vertex docCount = // tag::s4[]
dag.newVertex("doc-count", Processors.aggregateP(counting()));
// end::s4[]
// tag::s5[]
Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
// end::s5[]
Vertex tokenize = // tag::s6[]
dag.newVertex("tokenize", TokenizeP::new);
// end::s6[]
Vertex tf = // tag::s9[]
dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
// end::s9[]
Vertex tfidf = // tag::s10[]
dag.newVertex("tf-idf", TfIdfP::new);
// end::s10[]
Vertex sink = // tag::s12[]
dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
// end::s12[]
stopwordSource.localParallelism(1);
docSource.localParallelism(1);
docCount.localParallelism(1);
docLines.localParallelism(1);
// tag::s8[]
dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Aggregations