use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class FlattenRunnerTest method testFlattenWithDuplicateInputCollectionProducesMultipleOutputs.
/**
* Create a Flatten that consumes data from the same PCollection duplicated through two outputs
* and validates that inputs are flattened together and directed to the output.
*/
@Test
public void testFlattenWithDuplicateInputCollectionProducesMultipleOutputs() throws Exception {
String pTransformId = "pTransformId";
String mainOutputId = "101";
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build();
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).putInputs("inputA", "inputATarget").putInputs("inputAAgain", "inputATarget").putOutputs(mainOutputId, "mainOutputTarget").build();
RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("inputATarget").setCoderId("coder-id").build();
RunnerApi.Coder coder = CoderTranslation.toProto(StringUtf8Coder.of()).getCoder();
PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder(pTransformId, pTransform).processBundleInstructionId("57").pCollections(Collections.singletonMap("inputATarget", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
context.addPCollectionConsumer("mainOutputTarget", (FnDataReceiver) (FnDataReceiver<WindowedValue<String>>) mainOutputValues::add, StringUtf8Coder.of());
new FlattenRunner.Factory<>().createRunnerForPTransform(context);
mainOutputValues.clear();
assertThat(context.getPCollectionConsumers().keySet(), containsInAnyOrder("inputATarget", "mainOutputTarget"));
assertThat(context.getPCollectionConsumers().get("inputATarget"), hasSize(2));
FnDataReceiver<WindowedValue<?>> input = context.getPCollectionConsumer("inputATarget");
input.accept(WindowedValue.valueInGlobalWindow("A1"));
input.accept(WindowedValue.valueInGlobalWindow("A2"));
assertThat(mainOutputValues, containsInAnyOrder(valueInGlobalWindow("A1"), valueInGlobalWindow("A1"), valueInGlobalWindow("A2"), valueInGlobalWindow("A2")));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ToStringFnRunnerTest method testPrimitiveToString.
@Test
public void testPrimitiveToString() throws Exception {
String pTransformId = "pTransformId";
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().build();
ThrowingFunction<KV<String, Integer>, KV<String, String>> toStringFunction = ToStringFnRunner.createToStringFunctionForPTransform(pTransformId, pTransform);
KV<String, Integer> input = KV.of("key", 12345);
assertEquals(KV.of("key", "12345"), toStringFunction.apply(input));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ToStringFnRunnerTest method testToStringOverride.
@Test
public void testToStringOverride() throws Exception {
class ClassWithToStringOverride {
@Override
public String toString() {
return "Some string";
}
}
String pTransformId = "pTransformId";
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().build();
ThrowingFunction<KV<String, ClassWithToStringOverride>, KV<String, String>> toStringFunction = ToStringFnRunner.createToStringFunctionForPTransform(pTransformId, pTransform);
KV<String, ClassWithToStringOverride> input = KV.of("key", new ClassWithToStringOverride());
assertEquals(KV.of("key", "Some string"), toStringFunction.apply(input));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ProcessBundleHandler method createBundleProcessor.
private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
HashSet<String> processedPTransformIds = new HashSet<>();
PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
List<ThrowingRunnable> resetFunctions = new ArrayList<>();
List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
// Build a multimap of PCollection ids to PTransform ids which consume said PCollections
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
for (String pCollectionId : entry.getValue().getInputsMap().values()) {
pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
}
}
// Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
// specified.
HandleStateCallsForBundle beamFnStateClient;
if (bundleDescriptor.hasStateApiServiceDescriptor()) {
BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
} else {
beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
}
BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
BundleFinalizer bundleFinalizer = new BundleFinalizer() {
@Override
public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
}
};
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
// Create a BeamFnStateClient
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
// TODO: Remove source as a root and have it be triggered by the Runner.
if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
continue;
}
createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
}
bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
}, (timerEndpoint) -> {
if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
}
bundleProcessor.getTimerEndpoints().add(timerEndpoint);
}, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
}
bundleProcessor.finish();
return bundleProcessor;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class CombineRunners method createCombineGroupedValuesMapFunction.
static <KeyT, InputT, AccumT, OutputT> ThrowingFunction<KV<KeyT, Iterable<InputT>>, KV<KeyT, OutputT>> createCombineGroupedValuesMapFunction(String pTransformId, PTransform pTransform) throws IOException {
CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload());
CombineFn<InputT, AccumT, OutputT> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray(combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, Iterable<InputT>> input) -> {
return KV.of(input.getKey(), combineFn.apply(input.getValue()));
};
}
Aggregations