use of org.apache.beam.sdk.extensions.euphoria.core.client.io.Collector in project beam by apache.
the class DocumentationExamplesTest method wordCountExample.
@Ignore("We do not want to actually write output files from this test.")
@Test
public void wordCountExample() {
final PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
// Use Kryo as coder fallback
KryoCoderProvider.of().registerTo(pipeline);
// Source of data loaded from Beam IO.
PCollection<String> lines = pipeline.apply(Create.of(textLineByLine)).setTypeDescriptor(TypeDescriptor.of(String.class));
// FlatMap processes one input element at a time and allows user code to emit
// zero, one, or more output elements. From input lines we will get data set of words.
PCollection<String> words = FlatMap.named("TOKENIZER").of(lines).using((String line, Collector<String> context) -> {
for (String word : Splitter.onPattern("\\s+").split(line)) {
context.collect(word);
}
}).output();
// Now we can count input words - the operator ensures that all values for the same
// key (word in this case) end up being processed together. Then it counts number of appearances
// of the same key in 'words' dataset and emits it to output.
PCollection<KV<String, Long>> counted = CountByKey.named("COUNT").of(words).keyBy(w -> w).output();
// Format output.
PCollection<String> output = MapElements.named("FORMAT").of(counted).using(p -> p.getKey() + ": " + p.getValue()).output();
// Now we can again use Beam transformation. In this case we save words and their count
// into the text file.
output.apply(TextIO.write().to("counted_words"));
pipeline.run();
}
use of org.apache.beam.sdk.extensions.euphoria.core.client.io.Collector in project beam by apache.
the class JoinTest method testBuild_OptionalWindowing.
@Test
public void testBuild_OptionalWindowing() {
final Pipeline pipeline = TestUtils.createTestPipeline();
final PCollection<String> left = TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
final PCollection<String> right = TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
final PCollection<KV<Integer, String>> joined = Join.named("Join1").of(left, right).by(String::length, String::length).using((String l, String r, Collector<String> c) -> c.collect(l + r)).applyIf(true, b -> b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1))).triggeredBy(AfterWatermark.pastEndOfWindow()).accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)).output();
final Join join = (Join) TestUtils.getProducer(joined);
assertTrue(join.getWindow().isPresent());
final Window<?> window = (Window) join.getWindow().get();
assertEquals(FixedWindows.of(org.joda.time.Duration.standardHours(1)), window.getWindowFn());
assertEquals(AfterWatermark.pastEndOfWindow(), WindowDesc.of(window).getTrigger());
assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, WindowDesc.of(window).getAccumulationMode());
}
use of org.apache.beam.sdk.extensions.euphoria.core.client.io.Collector in project beam by apache.
the class BeamMetricsTranslationTest method testBeamMetricsTranslation.
/**
* Test metrics counters on {@link ReduceByKey} and {@link MapElements} operators Flow:
*
* <ol>
* <li>step RBK increment for all keys, add to histogram its value, collect even numbers.
* <li>step MapElements increment for every element, add to histogram its value, map to integer.
* <li>tep test MapElements with default operator name, increment by value of its element, add
* to histogram 2 times value of its element.
* </ol>
*/
@Test
public void testBeamMetricsTranslation() {
final PCollection<Integer> input = testPipeline.apply("input", Create.of(1, 2, 3, 4, 5).withType(TypeDescriptors.integers()));
final String counterName1 = "counter1";
final String operatorName1 = "count_elements_and_save_even_numbers";
final PCollection<KV<Integer, Integer>> kvInput = ReduceByKey.named(operatorName1).of(input).keyBy(e -> e).reduceBy((Stream<Integer> list, Collector<Integer> coll) -> list.forEach(i -> {
coll.getCounter(counterName1).increment();
coll.getHistogram(counterName1).add(i);
if (i % 2 == 0) {
coll.collect(i);
}
})).output();
final String counterName2 = "counter2";
final String operatorName2 = "map_to_integer";
final String operatorName3 = "map_elements";
final PCollection<Integer> mapElementsOutput = MapElements.named(operatorName2).of(// kvInput = [<2,2>, <4,4>]
kvInput).using((kv, context) -> {
final Integer value = kv.getValue();
context.getCounter(counterName2).increment();
context.getHistogram(counterName2).add(value);
return value;
}).output();
final PCollection<Integer> output = MapElements.named(operatorName3).of(// mapElementsOutput = [2,4]
mapElementsOutput).using((value, context) -> {
context.getCounter(counterName2).increment(value);
context.getHistogram(counterName2).add(value, 2);
return value;
}).output();
PAssert.that(output).containsInAnyOrder(2, 4);
final PipelineResult result = testPipeline.run();
result.waitUntilFinish();
final MetricQueryResults metricQueryResults = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.inNamespace(operatorName1)).addNameFilter(MetricNameFilter.inNamespace(operatorName2)).addNameFilter(MetricNameFilter.inNamespace(operatorName3)).build());
testStep1Metrics(metricQueryResults, counterName1, operatorName1);
testStep2Metrics(metricQueryResults, counterName2, operatorName2);
testStep3WithDefaultOperatorName(metricQueryResults, counterName2, operatorName3);
}
use of org.apache.beam.sdk.extensions.euphoria.core.client.io.Collector in project beam by apache.
the class DocumentationExamplesTest method metricsAndAccumulatorsSection.
@Test
public void metricsAndAccumulatorsSection() {
final PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
PCollection<String> dataset = pipeline.apply(Create.of("a", "x"));
FlatMap.named("FlatMap1").of(dataset).using((String value, Collector<String> context) -> {
context.getCounter("my-counter").increment();
context.collect(value);
}).output();
MapElements.named("MapThem").of(dataset).using((value, context) -> {
// use simple counter
context.getCounter("my-counter").increment();
return value.toLowerCase();
}).output();
}
Aggregations