use of com.hazelcast.jet.Util in project hazelcast by hazelcast.
the class ReadFilesPTest method pipeline.
private Pipeline pipeline(String glob) {
Pipeline p = Pipeline.create();
p.readFrom(Sources.filesBuilder(directory.getPath()).glob(glob == null ? "*" : glob).build(Util::entry)).writeTo(Sinks.list(list));
return p;
}
use of com.hazelcast.jet.Util in project hazelcast-jet by hazelcast.
the class StreamFilesPTest method when_metaSupplier_then_returnsCorrectProcessors.
@Test
public void when_metaSupplier_then_returnsCorrectProcessors() {
ProcessorMetaSupplier metaSupplier = streamFilesP(workDir.getAbsolutePath(), UTF_8, "*", Util::entry);
Address a = new Address();
ProcessorSupplier supplier = metaSupplier.get(singletonList(a)).apply(a);
supplier.init(new TestProcessorContext());
assertEquals(1, supplier.get(1).size());
supplier.close(null);
}
use of com.hazelcast.jet.Util in project hazelcast-jet by hazelcast.
the class StreamFilesP_integrationTest method buildDag.
private DAG buildDag() {
DAG dag = new DAG();
Vertex reader = dag.newVertex("reader", streamFilesP(directory.getPath(), UTF_8, "*", Util::entry)).localParallelism(1);
Vertex writer = dag.newVertex("writer", writeListP(list.getName())).localParallelism(1);
dag.edge(between(reader, writer));
return dag;
}
use of com.hazelcast.jet.Util in project hazelcast-jet-reference-manual by hazelcast.
the class TfIdfCoreApi method createDag.
private static DAG createDag() {
DistributedFunction<Entry<Entry<?, String>, ?>, String> byWord = item -> item.getKey().getValue();
DistributedBiFunction<Long, Object, Long> counter = (count, x) -> count + 1;
DAG dag = new DAG();
Vertex stopwordSource = // tag::s2[]
dag.newVertex("stopword-source", StopwordsP::new);
// end::s2[]
Vertex docSource = // tag::s1[]
dag.newVertex("doc-source", readMapP(DOCID_NAME));
// end::s1[]
Vertex docCount = // tag::s4[]
dag.newVertex("doc-count", Processors.aggregateP(counting()));
// end::s4[]
// tag::s5[]
Vertex docLines = dag.newVertex("doc-lines", nonCooperativeP(flatMapP((Entry<Long, String> e) -> traverseStream(docLines("books/" + e.getValue()).map(line -> entry(e.getKey(), line))))));
// end::s5[]
Vertex tokenize = // tag::s6[]
dag.newVertex("tokenize", TokenizeP::new);
// end::s6[]
Vertex tf = // tag::s9[]
dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry));
// end::s9[]
Vertex tfidf = // tag::s10[]
dag.newVertex("tf-idf", TfIdfP::new);
// end::s10[]
Vertex sink = // tag::s12[]
dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
// end::s12[]
stopwordSource.localParallelism(1);
docSource.localParallelism(1);
docCount.localParallelism(1);
docLines.localParallelism(1);
// tag::s8[]
dag.edge(between(stopwordSource, tokenize).broadcast().priority(-1)).edge(from(docLines).to(tokenize, 1));
return dag.edge(between(docSource, docCount).distributed().broadcast()).edge(from(docSource, 1).to(docLines)).edge(between(tokenize, tf).partitioned(wholeItem(), HASH_CODE)).edge(between(docCount, tfidf).broadcast().priority(-1)).edge(from(tf).to(tfidf, 1).distributed().partitioned(byWord, HASH_CODE)).edge(between(tfidf, sink));
}
Aggregations