use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.
the class BeamFnDataGrpcClientTest method testForInboundConsumer.
@Test
public void testForInboundConsumer() throws Exception {
CountDownLatch waitForClientToConnect = new CountDownLatch(1);
Collection<WindowedValue<String>> inboundValuesA = new ConcurrentLinkedQueue<>();
Collection<WindowedValue<String>> inboundValuesB = new ConcurrentLinkedQueue<>();
Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {
@Override
public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
outboundServerObserver.set(outboundObserver);
waitForClientToConnect.countDown();
return inboundServerObserver;
}
}).build();
server.start();
try {
ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
BeamFnDataInboundObserver2 observerA = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_A, CODER, inboundValuesA::add)), Collections.emptyList());
BeamFnDataInboundObserver2 observerB = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_B, CODER, inboundValuesB::add)), Collections.emptyList());
clientFactory.registerReceiver(INSTRUCTION_ID_A, Arrays.asList(apiServiceDescriptor), observerA);
waitForClientToConnect.await();
outboundServerObserver.get().onNext(ELEMENTS_A_1);
// Purposefully transmit some data before the consumer for B is bound showing that
// data is not lost
outboundServerObserver.get().onNext(ELEMENTS_B_1);
Thread.sleep(100);
clientFactory.registerReceiver(INSTRUCTION_ID_B, Arrays.asList(apiServiceDescriptor), observerB);
// Show that out of order stream completion can occur.
observerB.awaitCompletion();
assertThat(inboundValuesB, contains(valueInGlobalWindow("JKL"), valueInGlobalWindow("MNO")));
outboundServerObserver.get().onNext(ELEMENTS_A_2);
observerA.awaitCompletion();
assertThat(inboundValuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF"), valueInGlobalWindow("GHI")));
} finally {
server.shutdownNow();
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.
the class SparkExecutableStageFunction method getStateRequestHandler.
private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
final StateRequestHandler sideInputHandler;
StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {
@Override
public <T> List<T> getSideInput(String pCollectionId) {
Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
Broadcast<List<byte[]>> broadcast = tuple2._1;
WindowedValueCoder<SideInputT> coder = tuple2._2;
return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
}
});
try {
sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
} catch (IOException e) {
throw new RuntimeException("Failed to setup state handler", e);
}
if (bagUserStateHandlerFactory == null) {
bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
}
final StateRequestHandler userStateHandler;
if (executableStage.getUserStates().size() > 0) {
// Need to discard the old key's state
bagUserStateHandlerFactory.resetForNewKey();
userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
} else {
userStateHandler = StateRequestHandler.unsupported();
}
handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.
the class ParDoTranslation method translateParDo.
/**
* Translate a ParDo.
*/
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
final DoFn<?, ?> doFn = parDo.getFn();
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final String restrictionCoderId;
if (signature.processElement().isSplittable()) {
DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
} else {
restrictionCoderId = "";
}
Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
Coder<?> keyCoder;
if (signature.usesState() || signature.usesTimers()) {
checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
} else {
keyCoder = null;
}
return payloadForParDoLike(new ParDoLike() {
@Override
public FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
}
@Override
public Map<String, SideInput> translateSideInputs(SdkComponents components) {
Map<String, SideInput> sideInputs = new HashMap<>();
for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
}
return sideInputs;
}
@Override
public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
stateSpecs.put(state.getKey(), spec);
}
return stateSpecs;
}
@Override
public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timer.getKey(), spec);
}
for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timerFamily.getKey(), spec);
}
String onWindowExpirationTimerFamilySpec = null;
if (signature.onWindowExpiration() != null) {
RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
for (int i = 0; i < Integer.MAX_VALUE; ++i) {
onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
break;
}
}
timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
}
return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
}
@Override
public boolean isStateful() {
return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
}
@Override
public boolean isSplittable() {
return signature.processElement().isSplittable();
}
@Override
public boolean isRequiresStableInput() {
return signature.processElement().requiresStableInput();
}
@Override
public boolean isRequiresTimeSortedInput() {
return signature.processElement().requiresTimeSortedInput();
}
@Override
public boolean requestsFinalization() {
return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
}
@Override
public String translateRestrictionCoderId(SdkComponents newComponents) {
return restrictionCoderId;
}
}, components);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.
the class ParDoTranslation method fromProto.
@VisibleForTesting
static StateSpec<?> fromProto(RunnerApi.StateSpec stateSpec, RehydratedComponents components) throws IOException {
switch(stateSpec.getSpecCase()) {
case READ_MODIFY_WRITE_SPEC:
return StateSpecs.value(components.getCoder(stateSpec.getReadModifyWriteSpec().getCoderId()));
case BAG_SPEC:
return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId()));
case COMBINING_SPEC:
FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn();
if (!combineFnSpec.getUrn().equals(CombineTranslation.JAVA_SERIALIZED_COMBINE_FN_URN)) {
throw new UnsupportedOperationException(String.format("Cannot create %s from non-Java %s: %s", StateSpec.class.getSimpleName(), Combine.CombineFn.class.getSimpleName(), combineFnSpec.getUrn()));
}
Combine.CombineFn<?, ?, ?> combineFn = (Combine.CombineFn<?, ?, ?>) SerializableUtils.deserializeFromByteArray(combineFnSpec.getPayload().toByteArray(), Combine.CombineFn.class.getSimpleName());
// for the CombineFn, by construction
return StateSpecs.combining((Coder) components.getCoder(stateSpec.getCombiningSpec().getAccumulatorCoderId()), combineFn);
case MAP_SPEC:
return StateSpecs.map(components.getCoder(stateSpec.getMapSpec().getKeyCoderId()), components.getCoder(stateSpec.getMapSpec().getValueCoderId()));
case SET_SPEC:
return StateSpecs.set(components.getCoder(stateSpec.getSetSpec().getElementCoderId()));
case SPEC_NOT_SET:
default:
throw new IllegalArgumentException(String.format("Unknown %s: %s", RunnerApi.StateSpec.class.getName(), stateSpec));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Coder in project beam by apache.
the class PipelineValidator method validateComponents.
private static void validateComponents(String context, Components components, Set<String> requirements) {
{
Map<String, String> uniqueNamesById = Maps.newHashMap();
for (String transformId : components.getTransformsMap().keySet()) {
PTransform transform = components.getTransformsOrThrow(transformId);
String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId);
// A transform is allowed to not have unique_name set, but, obviously,
// there can be only one such transform with an empty name.
// It's allowed for the (only) root transform to have the empty unique_name.
checkArgument(previousId == null, "%s: Transforms %s and %s both have unique_name \"%s\"", context, transformId, previousId, transform.getUniqueName());
validateTransform(transformId, transform, components, requirements);
}
}
{
Map<String, String> uniqueNamesById = Maps.newHashMap();
for (String pcollectionId : components.getPcollectionsMap().keySet()) {
PCollection pc = components.getPcollectionsOrThrow(pcollectionId);
checkArgument(!pc.getUniqueName().isEmpty(), "%s: PCollection %s does not have a unique_name set", context, pcollectionId);
String previousId = uniqueNamesById.put(pc.getUniqueName(), pcollectionId);
checkArgument(previousId == null, "%s: PCollections %s and %s both have unique_name \"%s\"", context, pcollectionId, previousId, pc.getUniqueName());
checkArgument(components.containsCoders(pc.getCoderId()), "%s: PCollection %s uses unknown coder %s", context, pcollectionId, pc.getCoderId());
checkArgument(components.containsWindowingStrategies(pc.getWindowingStrategyId()), "%s: PCollection %s uses unknown windowing strategy %s", context, pcollectionId, pc.getWindowingStrategyId());
}
}
for (String strategyId : components.getWindowingStrategiesMap().keySet()) {
WindowingStrategy strategy = components.getWindowingStrategiesOrThrow(strategyId);
checkArgument(components.containsCoders(strategy.getWindowCoderId()), "%s: WindowingStrategy %s uses unknown coder %s", context, strategyId, strategy.getWindowCoderId());
}
for (String coderId : components.getCodersMap().keySet()) {
for (String componentCoderId : components.getCodersOrThrow(coderId).getComponentCoderIdsList()) {
checkArgument(components.containsCoders(componentCoderId), "%s: Coder %s uses unknown component coder %s", context, coderId, componentCoderId);
}
}
}
Aggregations