use of org.apache.beam.sdk.extensions.euphoria.core.client.operator.ReduceByKey in project beam by apache.
the class ReduceByKeyTranslator method translate.
@Override
public PCollection<KV<KeyT, OutputT>> translate(ReduceByKey<InputT, KeyT, ValueT, ?, OutputT> operator, PCollectionList<InputT> inputs) {
// todo Could we even do values sorting in Beam ? And do we want it?
checkState(!operator.getValueComparator().isPresent(), "Values sorting is not supported.");
final UnaryFunction<InputT, KeyT> keyExtractor = operator.getKeyExtractor();
final UnaryFunction<InputT, ValueT> valueExtractor = operator.getValueExtractor();
final PCollection<InputT> input = operator.getWindow().map(window -> PCollectionLists.getOnlyElement(inputs).apply(window)).orElseGet(() -> PCollectionLists.getOnlyElement(inputs));
// ~ create key & value extractor
final MapElements<InputT, KV<KeyT, ValueT>> extractor = MapElements.via(new KeyValueExtractor<>(keyExtractor, valueExtractor));
final PCollection<KV<KeyT, ValueT>> extracted = input.apply("extract-keys", extractor).setTypeDescriptor(TypeDescriptors.kvs(TypeAwareness.orObjects(operator.getKeyType()), TypeAwareness.orObjects(operator.getValueType())));
final AccumulatorProvider accumulators = new LazyAccumulatorProvider(AccumulatorProvider.of(inputs.getPipeline()));
if (operator.isCombinable()) {
// if operator is combinable we can process it in more efficient way
@SuppressWarnings("unchecked") final PCollection combined;
if (operator.isCombineFnStyle()) {
combined = extracted.apply("combine", Combine.perKey(asCombineFn(operator)));
} else {
combined = extracted.apply("combine", Combine.perKey(asCombiner(operator.getReducer(), accumulators, operator.getName().orElse(null))));
}
@SuppressWarnings("unchecked") final PCollection<KV<KeyT, OutputT>> cast = (PCollection) combined;
return cast.setTypeDescriptor(operator.getOutputType().orElseThrow(() -> new IllegalStateException("Unable to infer output type descriptor.")));
}
return extracted.apply("group", GroupByKey.create()).setTypeDescriptor(TypeDescriptors.kvs(TypeAwareness.orObjects(operator.getKeyType()), TypeDescriptors.iterables(TypeAwareness.orObjects(operator.getValueType())))).apply("reduce", ParDo.of(new ReduceDoFn<>(operator.getReducer(), accumulators, operator.getName().orElse(null)))).setTypeDescriptor(operator.getOutputType().orElseThrow(() -> new IllegalStateException("Unable to infer output type descriptor.")));
}
use of org.apache.beam.sdk.extensions.euphoria.core.client.operator.ReduceByKey in project beam by apache.
the class BeamMetricsTranslationTest method testBeamMetricsTranslation.
/**
* Test metrics counters on {@link ReduceByKey} and {@link MapElements} operators Flow:
*
* <ol>
* <li>step RBK increment for all keys, add to histogram its value, collect even numbers.
* <li>step MapElements increment for every element, add to histogram its value, map to integer.
* <li>tep test MapElements with default operator name, increment by value of its element, add
* to histogram 2 times value of its element.
* </ol>
*/
@Test
public void testBeamMetricsTranslation() {
final PCollection<Integer> input = testPipeline.apply("input", Create.of(1, 2, 3, 4, 5).withType(TypeDescriptors.integers()));
final String counterName1 = "counter1";
final String operatorName1 = "count_elements_and_save_even_numbers";
final PCollection<KV<Integer, Integer>> kvInput = ReduceByKey.named(operatorName1).of(input).keyBy(e -> e).reduceBy((Stream<Integer> list, Collector<Integer> coll) -> list.forEach(i -> {
coll.getCounter(counterName1).increment();
coll.getHistogram(counterName1).add(i);
if (i % 2 == 0) {
coll.collect(i);
}
})).output();
final String counterName2 = "counter2";
final String operatorName2 = "map_to_integer";
final String operatorName3 = "map_elements";
final PCollection<Integer> mapElementsOutput = MapElements.named(operatorName2).of(// kvInput = [<2,2>, <4,4>]
kvInput).using((kv, context) -> {
final Integer value = kv.getValue();
context.getCounter(counterName2).increment();
context.getHistogram(counterName2).add(value);
return value;
}).output();
final PCollection<Integer> output = MapElements.named(operatorName3).of(// mapElementsOutput = [2,4]
mapElementsOutput).using((value, context) -> {
context.getCounter(counterName2).increment(value);
context.getHistogram(counterName2).add(value, 2);
return value;
}).output();
PAssert.that(output).containsInAnyOrder(2, 4);
final PipelineResult result = testPipeline.run();
result.waitUntilFinish();
final MetricQueryResults metricQueryResults = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.inNamespace(operatorName1)).addNameFilter(MetricNameFilter.inNamespace(operatorName2)).addNameFilter(MetricNameFilter.inNamespace(operatorName3)).build());
testStep1Metrics(metricQueryResults, counterName1, operatorName1);
testStep2Metrics(metricQueryResults, counterName2, operatorName2);
testStep3WithDefaultOperatorName(metricQueryResults, counterName2, operatorName3);
}
Aggregations