use of org.apache.beam.sdk.extensions.euphoria.core.client.operator.base.Operator in project beam by apache.
the class DocumentationExamplesTest method wordCountExample.
@Ignore("We do not want to actually write output files from this test.")
@Test
public void wordCountExample() {
final PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
// Use Kryo as coder fallback
KryoCoderProvider.of().registerTo(pipeline);
// Source of data loaded from Beam IO.
PCollection<String> lines = pipeline.apply(Create.of(textLineByLine)).setTypeDescriptor(TypeDescriptor.of(String.class));
// FlatMap processes one input element at a time and allows user code to emit
// zero, one, or more output elements. From input lines we will get data set of words.
PCollection<String> words = FlatMap.named("TOKENIZER").of(lines).using((String line, Collector<String> context) -> {
for (String word : Splitter.onPattern("\\s+").split(line)) {
context.collect(word);
}
}).output();
// Now we can count input words - the operator ensures that all values for the same
// key (word in this case) end up being processed together. Then it counts number of appearances
// of the same key in 'words' dataset and emits it to output.
PCollection<KV<String, Long>> counted = CountByKey.named("COUNT").of(words).keyBy(w -> w).output();
// Format output.
PCollection<String> output = MapElements.named("FORMAT").of(counted).using(p -> p.getKey() + ": " + p.getValue()).output();
// Now we can again use Beam transformation. In this case we save words and their count
// into the text file.
output.apply(TextIO.write().to("counted_words"));
pipeline.run();
}
use of org.apache.beam.sdk.extensions.euphoria.core.client.operator.base.Operator in project beam by apache.
the class DocumentationExamplesTest method testGenericTranslatorProvider.
@Test
public void testGenericTranslatorProvider() {
GenericTranslatorProvider provider = GenericTranslatorProvider.newBuilder().register(FlatMap.class, // register by operator class
new FlatMapTranslator<>()).register(Join.class, (Join op) -> {
String name = ((Optional<String>) op.getName()).orElse("");
return name.toLowerCase().startsWith("broadcast");
}, // register by class and predicate
new BroadcastHashJoinTranslator<>()).register(op -> op instanceof CompositeOperator, // register by predicate only
new CompositeOperatorTranslator<>()).build();
Assert.assertNotNull(provider);
}
Aggregations