use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class TransformTranslator method combineGlobally.
private static <InputT, AccumT, OutputT> TransformEvaluator<Combine.Globally<InputT, OutputT>> combineGlobally() {
return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() {
@Override
public void evaluate(Combine.Globally<InputT, OutputT> transform, EvaluationContext context) {
final PCollection<InputT> input = context.getInput(transform);
final Coder<InputT> iCoder = context.getInput(transform).getCoder();
final Coder<OutputT> oCoder = context.getOutput(transform).getCoder();
final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
@SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn = (CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn());
final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder = WindowedValue.FullWindowedValueCoder.of(oCoder, windowingStrategy.getWindowFn().windowCoder());
final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
final boolean hasDefault = transform.isInsertDefault();
final SparkGlobalCombineFn<InputT, AccumT, OutputT> sparkCombineFn = new SparkGlobalCombineFn<>(combineFn, runtimeContext, TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy);
final Coder<AccumT> aCoder;
try {
aCoder = combineFn.getAccumulatorCoder(runtimeContext.getCoderRegistry(), iCoder);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
@SuppressWarnings("unchecked") JavaRDD<WindowedValue<InputT>> inRdd = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
JavaRDD<WindowedValue<OutputT>> outRdd;
Optional<Iterable<WindowedValue<AccumT>>> maybeAccumulated = GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, iCoder, aCoder, windowingStrategy);
if (maybeAccumulated.isPresent()) {
Iterable<WindowedValue<OutputT>> output = sparkCombineFn.extractOutput(maybeAccumulated.get());
outRdd = context.getSparkContext().parallelize(CoderHelpers.toByteArrays(output, wvoCoder)).map(CoderHelpers.fromByteFunction(wvoCoder));
} else {
// handle empty input RDD, which will naturally skip the entire execution
// as Spark will not run on empty RDDs.
JavaSparkContext jsc = new JavaSparkContext(inRdd.context());
if (hasDefault) {
OutputT defaultValue = combineFn.defaultValue();
outRdd = jsc.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder))).map(CoderHelpers.fromByteFunction(oCoder)).map(WindowingHelpers.<OutputT>windowFunction());
} else {
outRdd = jsc.emptyRDD();
}
}
context.putDataset(transform, new BoundedDataset<>(outRdd));
}
@Override
public String toNativeString() {
return "aggregate(..., new <fn>(), ...)";
}
};
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class TransformTranslator method flattenPColl.
private static <T> TransformEvaluator<Flatten.PCollections<T>> flattenPColl() {
return new TransformEvaluator<Flatten.PCollections<T>>() {
@SuppressWarnings("unchecked")
@Override
public void evaluate(Flatten.PCollections<T> transform, EvaluationContext context) {
Collection<PValue> pcs = context.getInputs(transform).values();
JavaRDD<WindowedValue<T>> unionRDD;
if (pcs.size() == 0) {
unionRDD = context.getSparkContext().emptyRDD();
} else {
JavaRDD<WindowedValue<T>>[] rdds = new JavaRDD[pcs.size()];
int index = 0;
for (PValue pc : pcs) {
checkArgument(pc instanceof PCollection, "Flatten had non-PCollection value in input: %s of type %s", pc, pc.getClass().getSimpleName());
rdds[index] = ((BoundedDataset<T>) context.borrowDataset(pc)).getRDD();
index++;
}
unionRDD = context.getSparkContext().union(rdds);
}
context.putDataset(transform, new BoundedDataset<>(unionRDD));
}
@Override
public String toNativeString() {
return "sparkContext.union(...)";
}
};
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataReadRunner.
@Test
public void testCreatingAndProcessingBeamFnDataReadRunner() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
String bundleId = "57L";
String primitiveTransformId = "100L";
long outputId = 101L;
List<WindowedValue<String>> outputValues = new ArrayList<>();
BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_INPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.getDefaultInstance()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
verifyZeroInteractions(beamFnDataClient);
CompletableFuture<Void> completionFuture = new CompletableFuture<>();
when(beamFnDataClient.forInboundConsumer(any(), any(), any(), any())).thenReturn(completionFuture);
Iterables.getOnlyElement(startFunctions).run();
verify(beamFnDataClient).forInboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName("input").build())), eq(STRING_CODER), consumerCaptor.capture());
consumerCaptor.getValue().accept(valueInGlobalWindow("TestValue"));
assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
outputValues.clear();
assertThat(newConsumers.keySet(), empty());
completionFuture.complete(null);
Iterables.getOnlyElement(finishFunctions).run();
verifyNoMoreInteractions(beamFnDataClient);
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingSource.
@Test
public void testCreatingAndProcessingSource() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(LONG_CODER_SPEC_ID, LONG_CODER_SPEC);
String primitiveTransformId = "100L";
long outputId = 101L;
BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
List<WindowedValue<String>> outputValues = new ArrayList<>();
BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<BoundedSource<Long>>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(JAVA_SOURCE_URN).setData(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)))).build())).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(LONG_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance("57L")::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
// This is testing a deprecated way of running sources and should be removed
// once all source definitions are instead propagated along the input edge.
Iterables.getOnlyElement(startFunctions).run();
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
outputValues.clear();
// Check that when passing a source along as an input, the source is processed.
assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow(CountingSource.upTo(2)));
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L)));
assertThat(finishFunctions, empty());
}
use of org.apache.beam.sdk.util.WindowedValue in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingBeamFnDataWriteRunner.
@Test
public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
String bundleId = "57L";
String primitiveTransformId = "100L";
long outputId = 101L;
BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(DATA_OUTPUT_URN).setData(Any.pack(REMOTE_PORT)).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(STRING_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance(bundleId)::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
verifyZeroInteractions(beamFnDataClient);
List<WindowedValue<String>> outputValues = new ArrayList<>();
AtomicBoolean wasCloseCalled = new AtomicBoolean();
CloseableThrowingConsumer<WindowedValue<String>> outputConsumer = new CloseableThrowingConsumer<WindowedValue<String>>() {
@Override
public void close() throws Exception {
wasCloseCalled.set(true);
}
@Override
public void accept(WindowedValue<String> t) throws Exception {
outputValues.add(t);
}
};
when(beamFnDataClient.forOutboundConsumer(any(), any(), Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(outputConsumer);
Iterables.getOnlyElement(startFunctions).run();
verify(beamFnDataClient).forOutboundConsumer(eq(REMOTE_PORT.getApiServiceDescriptor()), eq(KV.of(bundleId, BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build())), eq(STRING_CODER));
assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow("TestValue"));
assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
outputValues.clear();
assertFalse(wasCloseCalled.get());
Iterables.getOnlyElement(finishFunctions).run();
assertTrue(wasCloseCalled.get());
verifyNoMoreInteractions(beamFnDataClient);
}
Aggregations