use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DoFnFunction method prepareSerialization.
/**
* prepares the DoFnFunction class so it can be serialized properly. This involves using various
* protobuf's and byte arrays which are later converted back into the proper classes during
* deserialization.
*/
private void prepareSerialization() {
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
outputCodersBytes = new HashMap<>();
try {
coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
windowBytes = windowStrategyProto.toByteArray();
for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
}
sideInputBytes = new HashMap<>();
for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
}
serializedSideOutputs = new ArrayList<>();
for (TupleTag<?> sideOutput : sideOutputs) {
serializedSideOutputs.add(sideOutput.getId());
}
serializedOutputMap = new HashMap<>();
for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
}
} catch (IOException e) {
LOG.info(e.getMessage());
}
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class GroupByWindowFunction method initTransient.
/**
* Method used to initialize the transient variables that were sent over as byte arrays or proto
* buffers.
*/
private void initTransient() {
if (isInitialized) {
return;
}
SdkComponents components = SdkComponents.create();
try {
windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
windowingStrategy = (WindowingStrategy<?, W>) WindowingStrategyTranslation.fromProto(windowStrategyProto.getWindowingStrategy(), RehydratedComponents.forComponents(components.toComponents()));
} catch (InvalidProtocolBufferException e) {
LOG.info(e.getMessage());
}
this.isInitialized = true;
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class ExpansionService method expand.
@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
LOG.debug("Full transform: {}", request.getTransform());
Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
Pipeline pipeline = createPipeline();
boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
if (!isUseDeprecatedRead) {
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
// TODO(BEAM-10670): Remove this when we address performance issue.
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
} else {
LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
}
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
try {
return rehydratedComponents.getPCollection(input.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
String urn = request.getTransform().getSpec().getUrn();
TransformProvider transformProvider = null;
if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
assert allowList != null;
transformProvider = new JavaClassLookupTransformProvider(allowList);
} else {
transformProvider = getRegisteredTransforms().get(urn);
if (transformProvider == null) {
throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
}
}
List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
// Needed to find which transform was new...
SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
try {
return sdkComponents.registerPCollection(output.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
if (isUseDeprecatedRead) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
RunnerApi.Components components = pipelineProto.getComponents();
RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
LOG.debug("Expanded to {}", expandedTransform);
return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslator method serializeWindowingStrategy.
private static byte[] serializeWindowingStrategy(WindowingStrategy<?, ?> windowingStrategy, PipelineOptions options) {
try {
SdkComponents sdkComponents = SdkComponents.create();
String workerHarnessContainerImageURL = DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(workerHarnessContainerImageURL);
sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
return WindowingStrategyTranslation.toMessageProto(windowingStrategy, sdkComponents).toByteArray();
} catch (Exception e) {
throw new RuntimeException(String.format("Unable to format windowing strategy %s as bytes", windowingStrategy), e);
}
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class WindowUtilsTest method testInstantiateWindowedCoder.
@Test
public void testInstantiateWindowedCoder() throws IOException {
Coder<KV<Long, String>> expectedValueCoder = KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of());
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
String collectionId = components.registerPCollection(PCollection.createPrimitiveOutputInternal(Pipeline.create(), WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, expectedValueCoder).setName("name"));
assertEquals(expectedValueCoder, WindowUtils.instantiateWindowedCoder(collectionId, components.toComponents()).getValueCoder());
}
Aggregations