use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class FlattenRunnerTest method testFlattenWithDuplicateInputCollectionProducesMultipleOutputs.
/**
* Create a Flatten that consumes data from the same PCollection duplicated through two outputs
* and validates that inputs are flattened together and directed to the output.
*/
@Test
public void testFlattenWithDuplicateInputCollectionProducesMultipleOutputs() throws Exception {
String pTransformId = "pTransformId";
String mainOutputId = "101";
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build();
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).putInputs("inputA", "inputATarget").putInputs("inputAAgain", "inputATarget").putOutputs(mainOutputId, "mainOutputTarget").build();
RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("inputATarget").setCoderId("coder-id").build();
RunnerApi.Coder coder = CoderTranslation.toProto(StringUtf8Coder.of()).getCoder();
PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder(pTransformId, pTransform).processBundleInstructionId("57").pCollections(Collections.singletonMap("inputATarget", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
context.addPCollectionConsumer("mainOutputTarget", (FnDataReceiver) (FnDataReceiver<WindowedValue<String>>) mainOutputValues::add, StringUtf8Coder.of());
new FlattenRunner.Factory<>().createRunnerForPTransform(context);
mainOutputValues.clear();
assertThat(context.getPCollectionConsumers().keySet(), containsInAnyOrder("inputATarget", "mainOutputTarget"));
assertThat(context.getPCollectionConsumers().get("inputATarget"), hasSize(2));
FnDataReceiver<WindowedValue<?>> input = context.getPCollectionConsumer("inputATarget");
input.accept(WindowedValue.valueInGlobalWindow("A1"));
input.accept(WindowedValue.valueInGlobalWindow("A2"));
assertThat(mainOutputValues, containsInAnyOrder(valueInGlobalWindow("A1"), valueInGlobalWindow("A1"), valueInGlobalWindow("A2"), valueInGlobalWindow("A2")));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class ProcessBundleHandler method createBundleProcessor.
private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
HashSet<String> processedPTransformIds = new HashSet<>();
PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
List<ThrowingRunnable> resetFunctions = new ArrayList<>();
List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
// Build a multimap of PCollection ids to PTransform ids which consume said PCollections
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
for (String pCollectionId : entry.getValue().getInputsMap().values()) {
pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
}
}
// Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
// specified.
HandleStateCallsForBundle beamFnStateClient;
if (bundleDescriptor.hasStateApiServiceDescriptor()) {
BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
} else {
beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
}
BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
BundleFinalizer bundleFinalizer = new BundleFinalizer() {
@Override
public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
}
};
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
// Create a BeamFnStateClient
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
// TODO: Remove source as a root and have it be triggered by the Runner.
if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
continue;
}
createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
}
bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
}, (timerEndpoint) -> {
if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
}
bundleProcessor.getTimerEndpoints().add(timerEndpoint);
}, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
}
bundleProcessor.finish();
return bundleProcessor;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopicOverridden.
@Test
public void testTranslateSinkWithTopicOverridden() throws Exception {
ValueProvider<TopicPath> runtimeProvider = pipeline.newProvider(TOPIC);
PubsubUnboundedSink pubsubUnboundedSinkSink = new PubsubUnboundedSink(null, runtimeProvider, TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSinkSink);
PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
PDone output = input.apply(pubsubSink);
AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
assertEquals(((NestedValueProvider) runtimeProvider).propertyName(), payload.getTopicRuntimeOverridden());
assertTrue(payload.getTopic().isEmpty());
assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopic.
@Test
public void testTranslateSinkWithTopic() throws Exception {
PubsubUnboundedSink pubsubUnboundedSink = new PubsubUnboundedSink(null, StaticValueProvider.of(TOPIC), TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
PubsubUnboundedSink.PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSink);
PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
PDone output = input.apply(pubsubSink);
AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
assertEquals(TOPIC.getFullPath(), payload.getTopic());
assertTrue(payload.getTopicRuntimeOverridden().isEmpty());
assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
Aggregations