use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class WindowMergingFnRunnerTest method createMergeTransformForWindowFn.
private static <W extends BoundedWindow> RunnerApi.PTransform createMergeTransformForWindowFn(WindowFn<?, W> windowFn) throws Exception {
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("test"));
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(WindowMergingFnRunner.URN).setPayload(WindowingStrategyTranslation.toProto(windowFn, components).toByteString()).build();
return RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class PubSubReadPayloadTranslationTest method testTranslateSourceToFunctionSpec.
@Test
public void testTranslateSourceToFunctionSpec() throws Exception {
PCollection<byte[]> output = pipeline.apply(readFromPubSub);
AppliedPTransform<?, ?, Read.Unbounded<byte[]>> appliedPTransform = AppliedPTransform.of("ReadFromPubsub", PValues.expandInput(pipeline.begin()), PValues.expandOutput(output), readFromPubSub, ResourceHints.create(), pipeline);
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec spec = sourceTranslator.translate((AppliedPTransform) appliedPTransform, components);
assertEquals(PTransformTranslation.PUBSUB_READ, spec.getUrn());
PubSubReadPayload result = PubSubReadPayload.parseFrom(spec.getPayload());
assertEquals(pubsubReadPayload, result);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class ExpansionService method expand.
@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
LOG.debug("Full transform: {}", request.getTransform());
Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
Pipeline pipeline = createPipeline();
boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
if (!isUseDeprecatedRead) {
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
// TODO(BEAM-10670): Remove this when we address performance issue.
ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
} else {
LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
}
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
try {
return rehydratedComponents.getPCollection(input.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
String urn = request.getTransform().getSpec().getUrn();
TransformProvider transformProvider = null;
if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
assert allowList != null;
transformProvider = new JavaClassLookupTransformProvider(allowList);
} else {
transformProvider = getRegisteredTransforms().get(urn);
if (transformProvider == null) {
throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
}
}
List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
// Needed to find which transform was new...
SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
try {
return sdkComponents.registerPCollection(output.getValue());
} catch (IOException exn) {
throw new RuntimeException(exn);
}
}));
if (isUseDeprecatedRead) {
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
}
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
RunnerApi.Components components = pipelineProto.getComponents();
RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
LOG.debug("Expanded to {}", expandedTransform);
return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class ExpansionServiceTest method testConstruct.
@Test
public void testConstruct() {
Pipeline p = Pipeline.create();
p.apply(Impulse.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
String inputPcollId = Iterables.getOnlyElement(Iterables.getOnlyElement(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(TEST_URN)).putInputs("input", inputPcollId)).setNamespace(TEST_NAMESPACE).build();
ExpansionApi.ExpansionResponse response = expansionService.expand(request);
RunnerApi.PTransform expandedTransform = response.getTransform();
assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
// Verify it has the right input.
assertThat(expandedTransform.getInputsMap().values(), contains(inputPcollId));
// Verify it has the right output.
assertThat(expandedTransform.getOutputsMap().keySet(), contains("output"));
// Loose check that it's composite, and its children are represented.
assertThat(expandedTransform.getSubtransformsCount(), greaterThan(0));
for (String subtransform : expandedTransform.getSubtransformsList()) {
assertTrue(response.getComponents().containsTransforms(subtransform));
}
// Check that any newly generated components are properly namespaced.
Set<String> originalIds = allIds(request.getComponents());
for (String id : allIds(response.getComponents())) {
assertTrue(id, id.startsWith(TEST_NAMESPACE) || originalIds.contains(id));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class ArtifactStagingService method reverseArtifactRetrievalService.
@Override
public StreamObserver<ArtifactApi.ArtifactResponseWrapper> reverseArtifactRetrievalService(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
return new StreamObserver<ArtifactApi.ArtifactResponseWrapper>() {
/**
* The maximum number of parallel threads to use to stage.
*/
public static final int THREAD_POOL_SIZE = 10;
/**
* The maximum number of bytes to buffer across all writes before throttling.
*/
// 100 MB
public static final int MAX_PENDING_BYTES = 100 << 20;
IdGenerator idGenerator = IdGenerators.incrementingLongs();
String stagingToken;
Map<String, List<RunnerApi.ArtifactInformation>> toResolve;
Map<String, List<Future<RunnerApi.ArtifactInformation>>> stagedFutures;
ExecutorService stagingExecutor;
OverflowingSemaphore totalPendingBytes;
State state = State.START;
Queue<String> pendingResolves;
String currentEnvironment;
Queue<RunnerApi.ArtifactInformation> pendingGets;
BlockingQueue<ByteString> currentOutput;
@Override
@SuppressFBWarnings(value = "SF_SWITCH_FALLTHROUGH", justification = "fallthrough intended")
public synchronized // synchronization.
void onNext(ArtifactApi.ArtifactResponseWrapper responseWrapper) {
switch(state) {
case START:
stagingToken = responseWrapper.getStagingToken();
LOG.info("Staging artifacts for {}.", stagingToken);
toResolve = toStage.get(stagingToken);
if (toResolve == null) {
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Unknown staging token " + stagingToken)));
return;
}
stagedFutures = new ConcurrentHashMap<>();
pendingResolves = new ArrayDeque<>();
pendingResolves.addAll(toResolve.keySet());
stagingExecutor = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
totalPendingBytes = new OverflowingSemaphore(MAX_PENDING_BYTES);
resolveNextEnvironment(responseObserver);
break;
case RESOLVE:
{
currentEnvironment = pendingResolves.remove();
stagedFutures.put(currentEnvironment, new ArrayList<>());
pendingGets = new ArrayDeque<>();
for (RunnerApi.ArtifactInformation artifact : responseWrapper.getResolveArtifactResponse().getReplacementsList()) {
Optional<RunnerApi.ArtifactInformation> fetched = getLocal();
if (fetched.isPresent()) {
stagedFutures.get(currentEnvironment).add(CompletableFuture.completedFuture(fetched.get()));
} else {
pendingGets.add(artifact);
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setGetArtifact(ArtifactApi.GetArtifactRequest.newBuilder().setArtifact(artifact)).build());
}
}
LOG.info("Getting {} artifacts for {}.{}.", pendingGets.size(), stagingToken, pendingResolves.peek());
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
}
break;
}
case GET:
RunnerApi.ArtifactInformation currentArtifact = pendingGets.remove();
String name = createFilename(currentEnvironment, currentArtifact);
try {
LOG.debug("Storing artifacts for {} as {}", stagingToken, name);
currentOutput = new ArrayBlockingQueue<ByteString>(100);
stagedFutures.get(currentEnvironment).add(stagingExecutor.submit(new StoreArtifact(stagingToken, name, currentArtifact, currentOutput, totalPendingBytes)));
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
responseObserver.onError(exn);
}
state = State.GETCHUNK;
case GETCHUNK:
try {
ByteString chunk = responseWrapper.getGetArtifactResponse().getData();
if (chunk.size() > 0) {
// Make sure we don't accidentally send the EOF value.
totalPendingBytes.aquire(chunk.size());
currentOutput.put(chunk);
}
if (responseWrapper.getIsLast()) {
// The EOF value.
currentOutput.put(ByteString.EMPTY);
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
LOG.debug("Waiting for {}", pendingGets.peek());
}
}
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
onError(exn);
}
break;
default:
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Illegal state " + state)));
}
}
private void resolveNextEnvironment(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
if (pendingResolves.isEmpty()) {
finishStaging(responseObserver);
} else {
state = State.RESOLVE;
LOG.info("Resolving artifacts for {}.{}.", stagingToken, pendingResolves.peek());
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setResolveArtifact(ArtifactApi.ResolveArtifactsRequest.newBuilder().addAllArtifacts(toResolve.get(pendingResolves.peek()))).build());
}
}
private void finishStaging(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
LOG.debug("Finishing staging for {}.", stagingToken);
Map<String, List<RunnerApi.ArtifactInformation>> staged = new HashMap<>();
try {
for (Map.Entry<String, List<Future<RunnerApi.ArtifactInformation>>> entry : stagedFutures.entrySet()) {
List<RunnerApi.ArtifactInformation> envStaged = new ArrayList<>();
for (Future<RunnerApi.ArtifactInformation> future : entry.getValue()) {
envStaged.add(future.get());
}
staged.put(entry.getKey(), envStaged);
}
ArtifactStagingService.this.staged.put(stagingToken, staged);
stagingExecutor.shutdown();
state = State.DONE;
LOG.info("Artifacts fully staged for {}.", stagingToken);
responseObserver.onCompleted();
} catch (Exception exn) {
LOG.error("Error staging artifacts", exn);
responseObserver.onError(exn);
state = State.ERROR;
return;
}
}
/**
* Return an alternative artifact if we do not need to get this over the artifact API, or
* possibly at all.
*/
private Optional<RunnerApi.ArtifactInformation> getLocal() {
return Optional.empty();
}
/**
* Attempts to provide a reasonable filename for the artifact.
*
* @param index a monotonically increasing index, which provides uniqueness
* @param environment the environment id
* @param artifact the artifact itself
*/
private String createFilename(String environment, RunnerApi.ArtifactInformation artifact) {
String path;
try {
if (artifact.getRoleUrn().equals(ArtifactRetrievalService.STAGING_TO_ARTIFACT_URN)) {
path = RunnerApi.ArtifactStagingToRolePayload.parseFrom(artifact.getRolePayload()).getStagedName();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.FILE_ARTIFACT_URN)) {
path = RunnerApi.ArtifactFilePayload.parseFrom(artifact.getTypePayload()).getPath();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.URL_ARTIFACT_URN)) {
path = RunnerApi.ArtifactUrlPayload.parseFrom(artifact.getTypePayload()).getUrl();
} else {
path = "artifact";
}
} catch (InvalidProtocolBufferException exn) {
throw new RuntimeException(exn);
}
// Limit to the last contiguous alpha-numeric sequence. In particular, this will exclude
// all path separators.
List<String> components = Splitter.onPattern("[^A-Za-z-_.]]").splitToList(path);
String base = components.get(components.size() - 1);
return clip(String.format("%s-%s-%s", idGenerator.getId(), clip(environment, 25), base), 100);
}
private String clip(String s, int maxLength) {
return s.length() < maxLength ? s : s.substring(0, maxLength);
}
@Override
public void onError(Throwable throwable) {
stagingExecutor.shutdownNow();
LOG.error("Error staging artifacts", throwable);
state = State.ERROR;
}
@Override
public void onCompleted() {
Preconditions.checkArgument(state == State.DONE);
}
};
}
Aggregations