use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class StreamingTransformTranslator method parDo.
private static <InputT, OutputT> TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>> parDo() {
return new TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>>() {
@Override
public void evaluate(final ParDo.MultiOutput<InputT, OutputT> transform, final EvaluationContext context) {
final DoFn<InputT, OutputT> doFn = transform.getFn();
checkArgument(!DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable(), "Splittable DoFn not yet supported in streaming mode: %s", doFn);
rejectStateAndTimers(doFn);
final SerializablePipelineOptions options = context.getSerializableOptions();
final SparkPCollectionView pviews = context.getPViews();
final WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder();
Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();
@SuppressWarnings("unchecked") UnboundedDataset<InputT> unboundedDataset = (UnboundedDataset<InputT>) context.borrowDataset(transform);
JavaDStream<WindowedValue<InputT>> dStream = unboundedDataset.getDStream();
final DoFnSchemaInformation doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
final Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
final String stepName = context.getCurrentTransform().getFullName();
JavaPairDStream<TupleTag<?>, WindowedValue<?>> all = dStream.transformToPair(rdd -> {
final MetricsContainerStepMapAccumulator metricsAccum = MetricsAccumulator.getInstance();
final Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = TranslationUtils.getSideInputs(transform.getSideInputs().values(), JavaSparkContext.fromSparkContext(rdd.context()), pviews);
return rdd.mapPartitionsToPair(new MultiDoFnFunction<>(metricsAccum, stepName, doFn, options, transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), inputCoder, outputCoders, sideInputs, windowingStrategy, false, doFnSchemaInformation, sideInputMapping));
});
Map<TupleTag<?>, PCollection<?>> outputs = context.getOutputs(transform);
if (outputs.size() > 1) {
// Caching can cause Serialization, we need to code to bytes
// more details in https://issues.apache.org/jira/browse/BEAM-2669
Map<TupleTag<?>, Coder<WindowedValue<?>>> coderMap = TranslationUtils.getTupleTagCoders(outputs);
all = all.mapToPair(TranslationUtils.getTupleTagEncodeFunction(coderMap)).cache().mapToPair(TranslationUtils.getTupleTagDecodeFunction(coderMap));
}
for (Map.Entry<TupleTag<?>, PCollection<?>> output : outputs.entrySet()) {
@SuppressWarnings("unchecked") JavaPairDStream<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TranslationUtils.TupleTagFilter(output.getKey()));
@SuppressWarnings("unchecked") JavaDStream<WindowedValue<Object>> // Object is the best we can do since different outputs can have different tags
values = (JavaDStream<WindowedValue<Object>>) (JavaDStream<?>) TranslationUtils.dStreamValues(filtered);
context.putDataset(output.getValue(), new UnboundedDataset<>(values, unboundedDataset.getStreamSources()));
}
}
@Override
public String toNativeString() {
return "mapPartitions(new <fn>())";
}
};
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnFunction method initTransient.
/**
* Method used to initialize the transient variables that were sent over as byte arrays or proto
* buffers.
*/
private void initTransient() {
if (isInitialized) {
return;
}
try {
SdkComponents components = SdkComponents.create();
pipelineOptions = new SerializablePipelineOptions(serializedOptions).get();
DoFnWithExecutionInformation doFnWithExecutionInformation = (DoFnWithExecutionInformation) SerializableUtils.deserializeFromByteArray(doFnwithExBytes, "Custom Coder Bytes");
this.doFn = (DoFn<InputT, OutputT>) doFnWithExecutionInformation.getDoFn();
this.mainOutput = (TupleTag<OutputT>) doFnWithExecutionInformation.getMainOutputTag();
this.sideInputMapping = doFnWithExecutionInformation.getSideInputMapping();
this.doFnSchemaInformation = doFnWithExecutionInformation.getSchemaInformation();
inputCoder = (Coder<InputT>) SerializableUtils.deserializeFromByteArray(coderBytes, "Custom Coder Bytes");
windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
windowingStrategy = (WindowingStrategy<?, ?>) WindowingStrategyTranslation.fromProto(windowStrategyProto.getWindowingStrategy(), RehydratedComponents.forComponents(components.toComponents()));
sideInputs = new HashMap<>();
for (Map.Entry<String, byte[]> entry : sideInputBytes.entrySet()) {
windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(entry.getValue());
sideInputs.put(new TupleTag<>(entry.getKey()), WindowingStrategyTranslation.fromProto(windowStrategyProto.getWindowingStrategy(), RehydratedComponents.forComponents(components.toComponents())));
}
} catch (InvalidProtocolBufferException e) {
LOG.info(e.getMessage());
}
outputCoders = new HashMap<>();
for (Map.Entry<String, byte[]> entry : outputCodersBytes.entrySet()) {
outputCoders.put(new TupleTag<>(entry.getKey()), (Coder<?>) SerializableUtils.deserializeFromByteArray(entry.getValue(), "Custom Coder Bytes"));
}
sideOutputs = new ArrayList<>();
for (String sideOutput : serializedSideOutputs) {
sideOutputs.add(new TupleTag<>(sideOutput));
}
outputMap = new HashMap<>();
for (Map.Entry<String, Integer> entry : serializedOutputMap.entrySet()) {
outputMap.put(new TupleTag<>(entry.getKey()), entry.getValue());
}
outputManager = new DoFnOutputManager(this.outputMap);
this.isInitialized = true;
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnFunction method prepareSerialization.
/**
* prepares the DoFnFunction class so it can be serialized properly. This involves using various
* protobuf's and byte arrays which are later converted back into the proper classes during
* deserialization.
*/
private void prepareSerialization() {
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
outputCodersBytes = new HashMap<>();
try {
coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
windowBytes = windowStrategyProto.toByteArray();
for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
}
sideInputBytes = new HashMap<>();
for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
}
serializedSideOutputs = new ArrayList<>();
for (TupleTag<?> sideOutput : sideOutputs) {
serializedSideOutputs.add(sideOutput.getId());
}
serializedOutputMap = new HashMap<>();
for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
}
} catch (IOException e) {
LOG.info(e.getMessage());
}
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class CoderTypeSerializerTest method testWriteAndReadConfigSnapshot.
private void testWriteAndReadConfigSnapshot(Coder<String> coder) throws IOException {
CoderTypeSerializer<String> serializer = new CoderTypeSerializer<>(coder, new SerializablePipelineOptions(PipelineOptionsFactory.create()));
TypeSerializerSnapshot writtenSnapshot = serializer.snapshotConfiguration();
ComparatorTestBase.TestOutputView outView = new ComparatorTestBase.TestOutputView();
writtenSnapshot.writeSnapshot(outView);
TypeSerializerSnapshot readSnapshot = new CoderTypeSerializer.LegacySnapshot();
readSnapshot.readSnapshot(writtenSnapshot.getCurrentVersion(), outView.getInputView(), getClass().getClassLoader());
assertThat(readSnapshot.restoreSerializer(), is(serializer));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class ExecutableStageDoFnOperatorTest method sdkErrorsSurfaceOnClose.
@Test
public void sdkErrorsSurfaceOnClose() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
testHarness.open();
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
@SuppressWarnings("unchecked") FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
Exception expected = new RuntimeException(new Exception());
doThrow(expected).when(bundle).close();
thrown.expectCause(is(expected));
operator.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(0)));
testHarness.close();
}
Aggregations