use of org.apache.beam.runners.dataflow.worker.counters.NameContext in project beam by apache.
the class MeanByteCountMonitoringInfoToCounterUpdateTransformerTest method testTransformReturnsNullIfMonitoringInfoWithUnknownPCollectionLabelPresent.
@Test
public void testTransformReturnsNullIfMonitoringInfoWithUnknownPCollectionLabelPresent() {
Map<String, NameContext> pcollectionNameMapping = new HashMap<>();
MonitoringInfo monitoringInfo = MonitoringInfo.newBuilder().setUrn(Urns.SAMPLED_BYTE_SIZE).setType(TypeUrns.DISTRIBUTION_INT64_TYPE).putLabels(MonitoringInfoConstants.Labels.PCOLLECTION, "anyValue").build();
MeanByteCountMonitoringInfoToCounterUpdateTransformer testObject = new MeanByteCountMonitoringInfoToCounterUpdateTransformer(mockSpecValidator, pcollectionNameMapping);
when(mockSpecValidator.validate(any())).thenReturn(Optional.empty());
assertNull(testObject.transform(monitoringInfo));
}
use of org.apache.beam.runners.dataflow.worker.counters.NameContext in project beam by apache.
the class CreateExecutableStageNodeFunction method apply.
@Override
public Node apply(MutableNetwork<Node, Edge> input) {
for (Node node : input.nodes()) {
if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode || node instanceof InstructionOutputNode) {
continue;
}
throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input));
}
// Fix all non output nodes to have named edges.
for (Node node : input.nodes()) {
if (node instanceof InstructionOutputNode) {
continue;
}
for (Node successor : input.successors(node)) {
for (Edge edge : input.edgesConnecting(node, successor)) {
if (edge instanceof DefaultEdge) {
input.removeEdge(edge);
input.addEdge(node, successor, MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId())));
}
}
}
}
RunnerApi.Components.Builder componentsBuilder = RunnerApi.Components.newBuilder();
componentsBuilder.mergeFrom(this.pipeline.getComponents());
// Default to use the Java environment if pipeline doesn't have environment specified.
if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
String envId = Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn() + idGenerator.getId();
componentsBuilder.putEnvironments(envId, Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
}
// By default, use GlobalWindow for all languages.
// For java, if there is a IntervalWindowCoder, then use FixedWindow instead.
// TODO: should get real WindowingStategy from pipeline proto.
String globalWindowingStrategyId = "generatedGlobalWindowingStrategy" + idGenerator.getId();
String intervalWindowEncodingWindowingStrategyId = "generatedIntervalWindowEncodingWindowingStrategy" + idGenerator.getId();
SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents(), null);
try {
registerWindowingStrategy(globalWindowingStrategyId, WindowingStrategy.globalDefault(), componentsBuilder, sdkComponents);
registerWindowingStrategy(intervalWindowEncodingWindowingStrategyId, WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))), componentsBuilder, sdkComponents);
} catch (IOException exc) {
throw new RuntimeException("Could not convert default windowing stratey to proto", exc);
}
Map<Node, String> nodesToPCollections = new HashMap<>();
ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder();
ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder();
ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap.builder();
// A field of ExecutableStage which includes the PCollection goes to worker side.
Set<PCollectionNode> executableStageOutputs = new HashSet<>();
// A field of ExecutableStage which includes the PCollection goes to runner side.
Set<PCollectionNode> executableStageInputs = new HashSet<>();
for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) {
InstructionOutput instructionOutput = node.getInstructionOutput();
String coderId = "generatedCoder" + idGenerator.getId();
String windowingStrategyId;
try (ByteString.Output output = ByteString.newOutput()) {
try {
Coder<?> javaCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec()));
Coder<?> elementCoder = ((WindowedValueCoder<?>) javaCoder).getValueCoder();
sdkComponents.registerCoder(elementCoder);
RunnerApi.Coder coderProto = CoderTranslation.toProto(elementCoder, sdkComponents);
componentsBuilder.putCoders(coderId, coderProto);
// For now, Dataflow runner harness only deal with FixedWindow.
if (javaCoder instanceof FullWindowedValueCoder) {
FullWindowedValueCoder<?> windowedValueCoder = (FullWindowedValueCoder<?>) javaCoder;
Coder<?> windowCoder = windowedValueCoder.getWindowCoder();
if (windowCoder instanceof IntervalWindowCoder) {
windowingStrategyId = intervalWindowEncodingWindowingStrategyId;
} else if (windowCoder instanceof GlobalWindow.Coder) {
windowingStrategyId = globalWindowingStrategyId;
} else {
throw new UnsupportedOperationException(String.format("Dataflow portable runner harness doesn't support windowing with %s", windowCoder));
}
} else {
throw new UnsupportedOperationException("Dataflow portable runner harness only supports FullWindowedValueCoder");
}
} catch (IOException e) {
throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
} catch (Exception e) {
// Coder probably wasn't a java coder
OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec());
componentsBuilder.putCoders(coderId, RunnerApi.Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString())).build());
// For non-java coder, hope it's GlobalWindows by default.
// TODO(BEAM-6231): Actually discover the right windowing strategy.
windowingStrategyId = globalWindowingStrategyId;
}
} catch (IOException e) {
throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
}
// TODO(BEAM-6275): Set correct IsBounded on generated PCollections
String pcollectionId = node.getPcollectionId();
RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setCoderId(coderId).setWindowingStrategyId(windowingStrategyId).setIsBounded(RunnerApi.IsBounded.Enum.BOUNDED).build();
nodesToPCollections.put(node, pcollectionId);
componentsBuilder.putPcollections(pcollectionId, pCollection);
// is set
if (isExecutableStageOutputPCollection(input, node)) {
executableStageOutputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
}
if (isExecutableStageInputPCollection(input, node)) {
executableStageInputs.add(PipelineNode.pCollection(pcollectionId, pCollection));
}
}
componentsBuilder.putAllCoders(sdkComponents.toComponents().getCodersMap());
Set<PTransformNode> executableStageTransforms = new HashSet<>();
Set<TimerReference> executableStageTimers = new HashSet<>();
List<UserStateId> userStateIds = new ArrayList<>();
Set<SideInputReference> executableStageSideInputs = new HashSet<>();
for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) {
ImmutableMap.Builder<String, PCollectionNode> sideInputIds = ImmutableMap.builder();
ParallelInstruction parallelInstruction = node.getParallelInstruction();
String ptransformId = "generatedPtransform" + idGenerator.getId();
ptransformIdToNameContexts.put(ptransformId, NameContext.create(null, parallelInstruction.getOriginalName(), parallelInstruction.getSystemName(), parallelInstruction.getName()));
RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder();
List<String> timerIds = new ArrayList<>();
if (parallelInstruction.getParDo() != null) {
ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
String userFnClassName = userFnSpec.getClassName();
if (userFnClassName.equals("CombineValuesFn") || userFnClassName.equals("KeyedCombineFn")) {
transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec);
ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList());
} else {
String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);
RunnerApi.PTransform parDoPTransform = pipeline.getComponents().getTransformsOrDefault(parDoPTransformId, null);
// TODO: only the non-null branch should exist; for migration ease only
if (parDoPTransform != null) {
checkArgument(parDoPTransform.getSpec().getUrn().equals(PTransformTranslation.PAR_DO_TRANSFORM_URN), "Found transform \"%s\" for ParallelDo instruction, " + " but that transform had unexpected URN \"%s\" (expected \"%s\")", parDoPTransformId, parDoPTransform.getSpec().getUrn(), PTransformTranslation.PAR_DO_TRANSFORM_URN);
RunnerApi.ParDoPayload parDoPayload;
try {
parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
} catch (InvalidProtocolBufferException exc) {
throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
}
// user timers and user state.
for (Map.Entry<String, RunnerApi.TimerFamilySpec> entry : parDoPayload.getTimerFamilySpecsMap().entrySet()) {
timerIds.add(entry.getKey());
}
for (Map.Entry<String, RunnerApi.StateSpec> entry : parDoPayload.getStateSpecsMap().entrySet()) {
UserStateId.Builder builder = UserStateId.newBuilder();
builder.setTransformId(parDoPTransformId);
builder.setLocalName(entry.getKey());
userStateIds.add(builder.build());
}
// To facilitate the creation of Set executableStageSideInputs.
for (String sideInputTag : parDoPayload.getSideInputsMap().keySet()) {
String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
RunnerApi.PCollection sideInputPCollection = pipeline.getComponents().getPcollectionsOrThrow(sideInputPCollectionId);
pTransform.putInputs(sideInputTag, sideInputPCollectionId);
PCollectionNode pCollectionNode = PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection);
sideInputIds.put(sideInputTag, pCollectionNode);
}
// To facilitate the creation of Map(ptransformId -> pCollectionView), which is
// required by constructing an ExecutableStageNode.
ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder();
for (Map.Entry<String, RunnerApi.SideInput> sideInputEntry : parDoPayload.getSideInputsMap().entrySet()) {
pcollectionViews.add(RegisterNodeFunction.transformSideInputForRunner(pipeline, parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue()));
}
ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build());
transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString());
} else {
// legacy path - bytes are the FunctionSpec's payload field, basically, and
// SDKs expect it in the PTransform's payload field
byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(userFnBytes));
}
if (parDoInstruction.getSideInputs() != null) {
ptransformIdToSideInputInfos.put(ptransformId, forSideInputInfos(parDoInstruction.getSideInputs(), true));
}
}
} else if (parallelInstruction.getRead() != null) {
ReadInstruction readInstruction = parallelInstruction.getRead();
CloudObject sourceSpec = CloudObject.fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec());
// TODO: Need to plumb through the SDK specific function spec.
transformSpec.setUrn(JAVA_SOURCE_URN);
try {
byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE));
ByteString sourceByteString = ByteString.copyFrom(serializedSource);
transformSpec.setPayload(sourceByteString);
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Unable to process Read %s", parallelInstruction), e);
}
} else if (parallelInstruction.getFlatten() != null) {
transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN);
} else {
throw new IllegalArgumentException(String.format("Unknown type of ParallelInstruction %s", parallelInstruction));
}
// predecessor in a ParDo. This PCollection is called the "main input".
for (Node predecessorOutput : input.predecessors(node)) {
pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(predecessorOutput));
}
for (Edge edge : input.outEdges(node)) {
Node nodeOutput = input.incidentNodes(edge).target();
MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge;
pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput));
}
pTransform.setSpec(transformSpec);
PTransformNode pTransformNode = PipelineNode.pTransform(ptransformId, pTransform.build());
executableStageTransforms.add(pTransformNode);
for (String timerId : timerIds) {
executableStageTimers.add(TimerReference.of(pTransformNode, timerId));
}
ImmutableMap<String, PCollectionNode> sideInputIdToPCollectionNodes = sideInputIds.build();
for (String sideInputTag : sideInputIdToPCollectionNodes.keySet()) {
SideInputReference sideInputReference = SideInputReference.of(pTransformNode, sideInputTag, sideInputIdToPCollectionNodes.get(sideInputTag));
executableStageSideInputs.add(sideInputReference);
}
executableStageTransforms.add(pTransformNode);
}
if (executableStageInputs.size() != 1) {
throw new UnsupportedOperationException("ExecutableStage only support one input PCollection");
}
PCollectionNode executableInput = executableStageInputs.iterator().next();
RunnerApi.Components executableStageComponents = componentsBuilder.build();
// Get Environment from ptransform, otherwise, use JAVA_SDK_HARNESS_ENVIRONMENT as default.
Environment executableStageEnv = getEnvironmentFromPTransform(executableStageComponents, executableStageTransforms);
if (executableStageEnv == null) {
executableStageEnv = Environments.JAVA_SDK_HARNESS_ENVIRONMENT;
}
Set<UserStateReference> executableStageUserStateReference = new HashSet<>();
for (UserStateId userStateId : userStateIds) {
executableStageUserStateReference.add(UserStateReference.fromUserStateId(userStateId, executableStageComponents));
}
ExecutableStage executableStage = ImmutableExecutableStage.ofFullComponents(executableStageComponents, executableStageEnv, executableInput, executableStageSideInputs, executableStageUserStateReference, executableStageTimers, executableStageTransforms, executableStageOutputs, DEFAULT_WIRE_CODER_SETTINGS);
return ExecutableStageNode.create(executableStage, ptransformIdToNameContexts.build(), ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build());
}
use of org.apache.beam.runners.dataflow.worker.counters.NameContext in project beam by apache.
the class RegisterNodeFunction method apply.
@Override
public Node apply(MutableNetwork<Node, Edge> input) {
for (Node node : input.nodes()) {
if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode || node instanceof InstructionOutputNode) {
continue;
}
throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input));
}
// Fix all non output nodes to have named edges.
for (Node node : input.nodes()) {
if (node instanceof InstructionOutputNode) {
continue;
}
for (Node successor : input.successors(node)) {
for (Edge edge : input.edgesConnecting(node, successor)) {
if (edge instanceof DefaultEdge) {
input.removeEdge(edge);
input.addEdge(node, successor, MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId())));
}
}
}
}
// We start off by replacing all edges within the graph with edges that have the named
// outputs from the predecessor step. For ParallelInstruction Source nodes and RemoteGrpcPort
// nodes this is a generated port id. All ParDoInstructions will have already
ProcessBundleDescriptor.Builder processBundleDescriptor = ProcessBundleDescriptor.newBuilder().setId(idGenerator.getId()).setStateApiServiceDescriptor(stateApiServiceDescriptor);
// For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
// TODO: create a correct windowing strategy, including coders and environment
SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents(), null);
// Default to use the Java environment if pipeline doesn't have environment specified.
if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
}
String fakeWindowingStrategyId = "fakeWindowingStrategy" + idGenerator.getId();
try {
RunnerApi.MessageWithComponents fakeWindowingStrategyProto = WindowingStrategyTranslation.toMessageProto(WindowingStrategy.globalDefault(), sdkComponents);
processBundleDescriptor.putWindowingStrategies(fakeWindowingStrategyId, fakeWindowingStrategyProto.getWindowingStrategy()).putAllCoders(fakeWindowingStrategyProto.getComponents().getCodersMap()).putAllEnvironments(fakeWindowingStrategyProto.getComponents().getEnvironmentsMap());
} catch (IOException exc) {
throw new RuntimeException("Could not convert default windowing stratey to proto", exc);
}
Map<Node, String> nodesToPCollections = new HashMap<>();
ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder();
ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder();
ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap.builder();
ImmutableMap.Builder<String, NameContext> pcollectionIdToNameContexts = ImmutableMap.builder();
ImmutableMap.Builder<InstructionOutputNode, String> instructionOutputNodeToCoderIdBuilder = ImmutableMap.builder();
// 2. Generate new PCollectionId and register it with ProcessBundleDescriptor.
for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) {
InstructionOutput instructionOutput = node.getInstructionOutput();
String coderId = "generatedCoder" + idGenerator.getId();
instructionOutputNodeToCoderIdBuilder.put(node, coderId);
try (ByteString.Output output = ByteString.newOutput()) {
try {
Coder<?> javaCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec()));
sdkComponents.registerCoder(javaCoder);
RunnerApi.Coder coderProto = CoderTranslation.toProto(javaCoder, sdkComponents);
processBundleDescriptor.putCoders(coderId, coderProto);
} catch (IOException e) {
throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
} catch (Exception e) {
// Coder probably wasn't a java coder
OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec());
processBundleDescriptor.putCoders(coderId, RunnerApi.Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString())).build());
}
} catch (IOException e) {
throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
}
// Generate new PCollection ID and map it to relevant node.
// Will later be used to fill PTransform inputs/outputs information.
String pcollectionId = "generatedPcollection" + idGenerator.getId();
processBundleDescriptor.putPcollections(pcollectionId, RunnerApi.PCollection.newBuilder().setCoderId(coderId).setWindowingStrategyId(fakeWindowingStrategyId).build());
nodesToPCollections.put(node, pcollectionId);
pcollectionIdToNameContexts.put(pcollectionId, NameContext.create(null, instructionOutput.getOriginalName(), instructionOutput.getSystemName(), instructionOutput.getName()));
}
processBundleDescriptor.putAllCoders(sdkComponents.toComponents().getCodersMap());
Map<InstructionOutputNode, String> instructionOutputNodeToCoderIdMap = instructionOutputNodeToCoderIdBuilder.build();
for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) {
ParallelInstruction parallelInstruction = node.getParallelInstruction();
String ptransformId = "generatedPtransform" + idGenerator.getId();
ptransformIdToNameContexts.put(ptransformId, NameContext.create(null, parallelInstruction.getOriginalName(), parallelInstruction.getSystemName(), parallelInstruction.getName()));
RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder();
if (parallelInstruction.getParDo() != null) {
ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
String userFnClassName = userFnSpec.getClassName();
if ("CombineValuesFn".equals(userFnClassName) || "KeyedCombineFn".equals(userFnClassName)) {
transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec);
ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList());
} else {
String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);
RunnerApi.PTransform parDoPTransform = pipeline.getComponents().getTransformsOrDefault(parDoPTransformId, null);
// TODO: only the non-null branch should exist; for migration ease only
if (parDoPTransform != null) {
checkArgument(parDoPTransform.getSpec().getUrn().equals(PTransformTranslation.PAR_DO_TRANSFORM_URN), "Found transform \"%s\" for ParallelDo instruction, " + " but that transform had unexpected URN \"%s\" (expected \"%s\")", parDoPTransformId, parDoPTransform.getSpec().getUrn(), PTransformTranslation.PAR_DO_TRANSFORM_URN);
RunnerApi.ParDoPayload parDoPayload;
try {
parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
} catch (InvalidProtocolBufferException exc) {
throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
}
ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder();
for (Map.Entry<String, SideInput> sideInputEntry : parDoPayload.getSideInputsMap().entrySet()) {
pcollectionViews.add(transformSideInputForRunner(pipeline, parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue()));
transformSideInputForSdk(pipeline, parDoPTransform, sideInputEntry.getKey(), processBundleDescriptor, pTransform);
}
ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build());
transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString());
} else {
// legacy path - bytes are the FunctionSpec's payload field, basically, and
// SDKs expect it in the PTransform's payload field
byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(userFnBytes));
}
// Add side input information for batch pipelines
if (parDoInstruction.getSideInputs() != null) {
ptransformIdToSideInputInfos.put(ptransformId, forSideInputInfos(parDoInstruction.getSideInputs(), true));
}
}
} else if (parallelInstruction.getRead() != null) {
ReadInstruction readInstruction = parallelInstruction.getRead();
CloudObject sourceSpec = CloudObject.fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec());
// TODO: Need to plumb through the SDK specific function spec.
transformSpec.setUrn(JAVA_SOURCE_URN);
try {
byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE));
ByteString sourceByteString = ByteString.copyFrom(serializedSource);
transformSpec.setPayload(sourceByteString);
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Unable to process Read %s", parallelInstruction), e);
}
} else if (parallelInstruction.getFlatten() != null) {
transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN);
} else {
throw new IllegalArgumentException(String.format("Unknown type of ParallelInstruction %s", parallelInstruction));
}
for (Node predecessorOutput : input.predecessors(node)) {
pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(predecessorOutput));
}
for (Edge edge : input.outEdges(node)) {
Node nodeOutput = input.incidentNodes(edge).target();
MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge;
pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput));
}
pTransform.setSpec(transformSpec);
processBundleDescriptor.putTransforms(ptransformId, pTransform.build());
}
// Add the PTransforms representing the remote gRPC nodes
for (RemoteGrpcPortNode node : Iterables.filter(input.nodes(), RemoteGrpcPortNode.class)) {
RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
Set<Node> predecessors = input.predecessors(node);
Set<Node> successors = input.successors(node);
if (predecessors.isEmpty() && !successors.isEmpty()) {
Node instructionOutputNode = Iterables.getOnlyElement(successors);
pTransform.putOutputs("generatedOutput" + idGenerator.getId(), nodesToPCollections.get(instructionOutputNode));
pTransform.setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).setPayload(node.getRemoteGrpcPort().toBuilder().setCoderId(instructionOutputNodeToCoderIdMap.get(instructionOutputNode)).build().toByteString()).build());
} else if (!predecessors.isEmpty() && successors.isEmpty()) {
Node instructionOutputNode = Iterables.getOnlyElement(predecessors);
pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(instructionOutputNode));
pTransform.setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_OUTPUT_URN).setPayload(node.getRemoteGrpcPort().toBuilder().setCoderId(instructionOutputNodeToCoderIdMap.get(instructionOutputNode)).build().toByteString()).build());
} else {
throw new IllegalStateException("Expected either one input OR one output " + "InstructionOutputNode for this RemoteGrpcPortNode");
}
processBundleDescriptor.putTransforms(node.getPrimitiveTransformId(), pTransform.build());
}
return RegisterRequestNode.create(RegisterRequest.newBuilder().addProcessBundleDescriptor(processBundleDescriptor).build(), ptransformIdToNameContexts.build(), ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build(), pcollectionIdToNameContexts.build());
}
use of org.apache.beam.runners.dataflow.worker.counters.NameContext in project beam by apache.
the class DataflowWorkerLoggingHandler method publish.
public synchronized void publish(DataflowExecutionState currentExecutionState, LogRecord record) {
if (!isLoggable(record)) {
return;
}
rolloverOutputStreamIfNeeded();
try {
// Generating a JSON map like:
// {"timestamp": {"seconds": 1435835832, "nanos": 123456789}, ... "message": "hello"}
generator.writeStartObject();
// Write the timestamp.
generator.writeFieldName("timestamp");
generator.writeStartObject();
generator.writeNumberField("seconds", record.getMillis() / 1000);
generator.writeNumberField("nanos", (record.getMillis() % 1000) * 1000000);
generator.writeEndObject();
// Write the severity.
generator.writeObjectField("severity", MoreObjects.firstNonNull(LEVELS.get(record.getLevel()), record.getLevel().getName()));
// Write the other labels.
writeIfNotEmpty("message", formatter.formatMessage(record));
writeIfNotEmpty("thread", String.valueOf(record.getThreadID()));
writeIfNotEmpty("job", DataflowWorkerLoggingMDC.getJobId());
writeIfNotEmpty("stage", DataflowWorkerLoggingMDC.getStageName());
if (currentExecutionState != null) {
NameContext nameContext = currentExecutionState.getStepName();
if (nameContext != null) {
writeIfNotEmpty("step", nameContext.userName());
}
}
writeIfNotEmpty("worker", DataflowWorkerLoggingMDC.getWorkerId());
writeIfNotEmpty("work", DataflowWorkerLoggingMDC.getWorkId());
writeIfNotEmpty("logger", record.getLoggerName());
writeIfNotEmpty("exception", formatException(record.getThrown()));
generator.writeEndObject();
generator.writeRaw(System.lineSeparator());
} catch (IOException | RuntimeException e) {
reportFailure("Unable to publish", e, ErrorManager.WRITE_FAILURE);
}
// This implementation is based on that of java.util.logging.FileHandler, which flushes in a
// synchronized context like this. Unfortunately the maximum throughput for generating log
// entries will be the inverse of the flush latency. That could be as little as one hundred
// log entries per second on some systems. For higher throughput this should be changed to
// batch publish operations while writes and flushes are in flight on a different thread.
flush();
}
use of org.apache.beam.runners.dataflow.worker.counters.NameContext in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createOperationTransformForRegisterFnNodes.
private Function<Node, Node> createOperationTransformForRegisterFnNodes(final IdGenerator idGenerator, final InstructionRequestHandler instructionRequestHandler, final StateDelegator beamFnStateDelegator, final String stageName, final DataflowExecutionContext<?> executionContext) {
return new TypeSafeNodeFunction<RegisterRequestNode>(RegisterRequestNode.class) {
@Override
public Node typedApply(RegisterRequestNode input) {
ImmutableMap.Builder<String, DataflowOperationContext> ptransformIdToOperationContextBuilder = ImmutableMap.builder();
ImmutableMap.Builder<String, DataflowStepContext> ptransformIdToStepContext = ImmutableMap.builder();
for (Map.Entry<String, NameContext> entry : input.getPTransformIdToPartialNameContextMap().entrySet()) {
NameContext fullNameContext = NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName());
DataflowOperationContext operationContext = executionContext.createOperationContext(fullNameContext);
ptransformIdToOperationContextBuilder.put(entry.getKey(), operationContext);
ptransformIdToStepContext.put(entry.getKey(), executionContext.getStepContext(operationContext));
}
ImmutableMap.Builder<String, NameContext> pcollectionIdToNameContext = ImmutableMap.builder();
for (Map.Entry<String, NameContext> entry : input.getPCollectionToPartialNameContextMap().entrySet()) {
pcollectionIdToNameContext.put(entry.getKey(), NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName()));
}
ImmutableMap<String, DataflowOperationContext> ptransformIdToOperationContexts = ptransformIdToOperationContextBuilder.build();
ImmutableMap<String, SideInputReader> ptransformIdToSideInputReaders = buildPTransformIdToSideInputReadersMap(executionContext, input, ptransformIdToOperationContexts);
ImmutableTable<String, String, PCollectionView<?>> ptransformIdToSideInputIdToPCollectionView = buildPTransformIdToSideInputIdToPCollectionView(input);
return OperationNode.create(new RegisterAndProcessBundleOperation(idGenerator, instructionRequestHandler, beamFnStateDelegator, input.getRegisterRequest(), ptransformIdToOperationContexts, ptransformIdToStepContext.build(), ptransformIdToSideInputReaders, ptransformIdToSideInputIdToPCollectionView, pcollectionIdToNameContext.build(), // TODO: Set NameContext properly for these operations.
executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName))));
}
};
}
Aggregations