Search in sources :

Example 16 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class ArtifactStagingService method reverseArtifactRetrievalService.

@Override
public StreamObserver<ArtifactApi.ArtifactResponseWrapper> reverseArtifactRetrievalService(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
    return new StreamObserver<ArtifactApi.ArtifactResponseWrapper>() {

        /**
         * The maximum number of parallel threads to use to stage.
         */
        public static final int THREAD_POOL_SIZE = 10;

        /**
         * The maximum number of bytes to buffer across all writes before throttling.
         */
        // 100 MB
        public static final int MAX_PENDING_BYTES = 100 << 20;

        IdGenerator idGenerator = IdGenerators.incrementingLongs();

        String stagingToken;

        Map<String, List<RunnerApi.ArtifactInformation>> toResolve;

        Map<String, List<Future<RunnerApi.ArtifactInformation>>> stagedFutures;

        ExecutorService stagingExecutor;

        OverflowingSemaphore totalPendingBytes;

        State state = State.START;

        Queue<String> pendingResolves;

        String currentEnvironment;

        Queue<RunnerApi.ArtifactInformation> pendingGets;

        BlockingQueue<ByteString> currentOutput;

        @Override
        @SuppressFBWarnings(value = "SF_SWITCH_FALLTHROUGH", justification = "fallthrough intended")
        public synchronized // synchronization.
        void onNext(ArtifactApi.ArtifactResponseWrapper responseWrapper) {
            switch(state) {
                case START:
                    stagingToken = responseWrapper.getStagingToken();
                    LOG.info("Staging artifacts for {}.", stagingToken);
                    toResolve = toStage.get(stagingToken);
                    if (toResolve == null) {
                        responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Unknown staging token " + stagingToken)));
                        return;
                    }
                    stagedFutures = new ConcurrentHashMap<>();
                    pendingResolves = new ArrayDeque<>();
                    pendingResolves.addAll(toResolve.keySet());
                    stagingExecutor = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
                    totalPendingBytes = new OverflowingSemaphore(MAX_PENDING_BYTES);
                    resolveNextEnvironment(responseObserver);
                    break;
                case RESOLVE:
                    {
                        currentEnvironment = pendingResolves.remove();
                        stagedFutures.put(currentEnvironment, new ArrayList<>());
                        pendingGets = new ArrayDeque<>();
                        for (RunnerApi.ArtifactInformation artifact : responseWrapper.getResolveArtifactResponse().getReplacementsList()) {
                            Optional<RunnerApi.ArtifactInformation> fetched = getLocal();
                            if (fetched.isPresent()) {
                                stagedFutures.get(currentEnvironment).add(CompletableFuture.completedFuture(fetched.get()));
                            } else {
                                pendingGets.add(artifact);
                                responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setGetArtifact(ArtifactApi.GetArtifactRequest.newBuilder().setArtifact(artifact)).build());
                            }
                        }
                        LOG.info("Getting {} artifacts for {}.{}.", pendingGets.size(), stagingToken, pendingResolves.peek());
                        if (pendingGets.isEmpty()) {
                            resolveNextEnvironment(responseObserver);
                        } else {
                            state = State.GET;
                        }
                        break;
                    }
                case GET:
                    RunnerApi.ArtifactInformation currentArtifact = pendingGets.remove();
                    String name = createFilename(currentEnvironment, currentArtifact);
                    try {
                        LOG.debug("Storing artifacts for {} as {}", stagingToken, name);
                        currentOutput = new ArrayBlockingQueue<ByteString>(100);
                        stagedFutures.get(currentEnvironment).add(stagingExecutor.submit(new StoreArtifact(stagingToken, name, currentArtifact, currentOutput, totalPendingBytes)));
                    } catch (Exception exn) {
                        LOG.error("Error submitting.", exn);
                        responseObserver.onError(exn);
                    }
                    state = State.GETCHUNK;
                case GETCHUNK:
                    try {
                        ByteString chunk = responseWrapper.getGetArtifactResponse().getData();
                        if (chunk.size() > 0) {
                            // Make sure we don't accidentally send the EOF value.
                            totalPendingBytes.aquire(chunk.size());
                            currentOutput.put(chunk);
                        }
                        if (responseWrapper.getIsLast()) {
                            // The EOF value.
                            currentOutput.put(ByteString.EMPTY);
                            if (pendingGets.isEmpty()) {
                                resolveNextEnvironment(responseObserver);
                            } else {
                                state = State.GET;
                                LOG.debug("Waiting for {}", pendingGets.peek());
                            }
                        }
                    } catch (Exception exn) {
                        LOG.error("Error submitting.", exn);
                        onError(exn);
                    }
                    break;
                default:
                    responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Illegal state " + state)));
            }
        }

        private void resolveNextEnvironment(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
            if (pendingResolves.isEmpty()) {
                finishStaging(responseObserver);
            } else {
                state = State.RESOLVE;
                LOG.info("Resolving artifacts for {}.{}.", stagingToken, pendingResolves.peek());
                responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setResolveArtifact(ArtifactApi.ResolveArtifactsRequest.newBuilder().addAllArtifacts(toResolve.get(pendingResolves.peek()))).build());
            }
        }

        private void finishStaging(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
            LOG.debug("Finishing staging for {}.", stagingToken);
            Map<String, List<RunnerApi.ArtifactInformation>> staged = new HashMap<>();
            try {
                for (Map.Entry<String, List<Future<RunnerApi.ArtifactInformation>>> entry : stagedFutures.entrySet()) {
                    List<RunnerApi.ArtifactInformation> envStaged = new ArrayList<>();
                    for (Future<RunnerApi.ArtifactInformation> future : entry.getValue()) {
                        envStaged.add(future.get());
                    }
                    staged.put(entry.getKey(), envStaged);
                }
                ArtifactStagingService.this.staged.put(stagingToken, staged);
                stagingExecutor.shutdown();
                state = State.DONE;
                LOG.info("Artifacts fully staged for {}.", stagingToken);
                responseObserver.onCompleted();
            } catch (Exception exn) {
                LOG.error("Error staging artifacts", exn);
                responseObserver.onError(exn);
                state = State.ERROR;
                return;
            }
        }

        /**
         * Return an alternative artifact if we do not need to get this over the artifact API, or
         * possibly at all.
         */
        private Optional<RunnerApi.ArtifactInformation> getLocal() {
            return Optional.empty();
        }

        /**
         * Attempts to provide a reasonable filename for the artifact.
         *
         * @param index a monotonically increasing index, which provides uniqueness
         * @param environment the environment id
         * @param artifact the artifact itself
         */
        private String createFilename(String environment, RunnerApi.ArtifactInformation artifact) {
            String path;
            try {
                if (artifact.getRoleUrn().equals(ArtifactRetrievalService.STAGING_TO_ARTIFACT_URN)) {
                    path = RunnerApi.ArtifactStagingToRolePayload.parseFrom(artifact.getRolePayload()).getStagedName();
                } else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.FILE_ARTIFACT_URN)) {
                    path = RunnerApi.ArtifactFilePayload.parseFrom(artifact.getTypePayload()).getPath();
                } else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.URL_ARTIFACT_URN)) {
                    path = RunnerApi.ArtifactUrlPayload.parseFrom(artifact.getTypePayload()).getUrl();
                } else {
                    path = "artifact";
                }
            } catch (InvalidProtocolBufferException exn) {
                throw new RuntimeException(exn);
            }
            // Limit to the last contiguous alpha-numeric sequence. In particular, this will exclude
            // all path separators.
            List<String> components = Splitter.onPattern("[^A-Za-z-_.]]").splitToList(path);
            String base = components.get(components.size() - 1);
            return clip(String.format("%s-%s-%s", idGenerator.getId(), clip(environment, 25), base), 100);
        }

        private String clip(String s, int maxLength) {
            return s.length() < maxLength ? s : s.substring(0, maxLength);
        }

        @Override
        public void onError(Throwable throwable) {
            stagingExecutor.shutdownNow();
            LOG.error("Error staging artifacts", throwable);
            state = State.ERROR;
        }

        @Override
        public void onCompleted() {
            Preconditions.checkArgument(state == State.DONE);
        }
    };
}
Also used : ArtifactApi(org.apache.beam.model.jobmanagement.v1.ArtifactApi) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BlockingQueue(java.util.concurrent.BlockingQueue) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Queue(java.util.Queue) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) BlockingQueue(java.util.concurrent.BlockingQueue) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Optional(java.util.Optional) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IdGenerator(org.apache.beam.sdk.fn.IdGenerator) ArrayDeque(java.util.ArrayDeque) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException) ExecutorService(java.util.concurrent.ExecutorService) CompletableFuture(java.util.concurrent.CompletableFuture) Future(java.util.concurrent.Future) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 17 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DefaultJobBundleFactory method createEnvironmentCaches.

private ImmutableList<EnvironmentCacheAndLock> createEnvironmentCaches(ThrowingFunction<ServerFactory, ServerInfo> serverInfoCreator, int count) {
    ImmutableList.Builder<EnvironmentCacheAndLock> caches = ImmutableList.builder();
    for (int i = 0; i < count; i++) {
        final Lock refLock;
        if (environmentExpirationMillis > 0) {
            // The lock ensures there is no race condition between expiring an environment and a client
            // still attempting to use it, hence referencing it.
            refLock = new ReentrantLock(true);
        } else {
            refLock = NoopLock.get();
        }
        CacheBuilder<Environment, WrappedSdkHarnessClient> cacheBuilder = CacheBuilder.newBuilder().removalListener(notification -> {
            WrappedSdkHarnessClient client = notification.getValue();
            final int refCount;
            // We need to use a lock here to ensure we are not causing the environment to
            // be removed if beforehand a StageBundleFactory has retrieved it but not yet
            // issued ref() on it.
            refLock.lock();
            try {
                refCount = client.unref();
            } finally {
                refLock.unlock();
            }
            if (refCount > 0) {
                LOG.warn("Expiring environment {} with {} remaining bundle references. Taking note to clean it up during shutdown if the references are not removed by then.", notification.getKey(), refCount);
                evictedActiveClients.add(client);
            }
        });
        if (environmentExpirationMillis > 0) {
            cacheBuilder.expireAfterWrite(environmentExpirationMillis, TimeUnit.MILLISECONDS);
        }
        LoadingCache<Environment, WrappedSdkHarnessClient> cache = cacheBuilder.build(new CacheLoader<Environment, WrappedSdkHarnessClient>() {

            @Override
            public WrappedSdkHarnessClient load(Environment environment) throws Exception {
                EnvironmentFactory.Provider environmentFactoryProvider = environmentFactoryProviderMap.get(environment.getUrn());
                ServerFactory serverFactory = environmentFactoryProvider.getServerFactory();
                ServerInfo serverInfo = serverInfoCreator.apply(serverFactory);
                String workerId = stageIdGenerator.getId();
                serverInfo.getProvisioningServer().getService().registerEnvironment(workerId, environment);
                EnvironmentFactory environmentFactory = environmentFactoryProvider.createEnvironmentFactory(serverInfo.getControlServer(), serverInfo.getLoggingServer(), serverInfo.getRetrievalServer(), serverInfo.getProvisioningServer(), clientPool, stageIdGenerator);
                return WrappedSdkHarnessClient.wrapping(environmentFactory.createEnvironment(environment, workerId), serverInfo);
            }
        });
        caches.add(new EnvironmentCacheAndLock(cache, refLock));
    }
    return caches.build();
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ServerFactory(org.apache.beam.sdk.fn.server.ServerFactory) IOException(java.io.IOException) ReentrantLock(java.util.concurrent.locks.ReentrantLock) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) GrpcContextHeaderAccessorProvider(org.apache.beam.sdk.fn.server.GrpcContextHeaderAccessorProvider) EmbeddedEnvironmentFactory(org.apache.beam.runners.fnexecution.environment.EmbeddedEnvironmentFactory) ExternalEnvironmentFactory(org.apache.beam.runners.fnexecution.environment.ExternalEnvironmentFactory) ProcessEnvironmentFactory(org.apache.beam.runners.fnexecution.environment.ProcessEnvironmentFactory) EnvironmentFactory(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory) DockerEnvironmentFactory(org.apache.beam.runners.fnexecution.environment.DockerEnvironmentFactory) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Example 18 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class ExecutableStage method fromPayload.

/**
 * Return an {@link ExecutableStage} constructed from the provided {@link FunctionSpec}
 * representation.
 *
 * <p>See {@link #toPTransform} for how the payload is constructed.
 *
 * <p>Note: The payload contains some information redundant with the {@link PTransform} it is the
 * payload of. The {@link ExecutableStagePayload} should be sufficiently rich to construct a
 * {@code ProcessBundleDescriptor} using only the payload.
 */
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
    Components components = payload.getComponents();
    Environment environment = payload.getEnvironment();
    Collection<WireCoderSetting> wireCoderSettings = payload.getWireCoderSettingsList();
    PCollectionNode input = PipelineNode.pCollection(payload.getInput(), components.getPcollectionsOrThrow(payload.getInput()));
    List<SideInputReference> sideInputs = payload.getSideInputsList().stream().map(sideInputId -> SideInputReference.fromSideInputId(sideInputId, components)).collect(Collectors.toList());
    List<UserStateReference> userStates = payload.getUserStatesList().stream().map(userStateId -> UserStateReference.fromUserStateId(userStateId, components)).collect(Collectors.toList());
    List<TimerReference> timers = payload.getTimersList().stream().map(timerId -> TimerReference.fromTimerId(timerId, components)).collect(Collectors.toList());
    List<PTransformNode> transforms = payload.getTransformsList().stream().map(id -> PipelineNode.pTransform(id, components.getTransformsOrThrow(id))).collect(Collectors.toList());
    List<PCollectionNode> outputs = payload.getOutputsList().stream().map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id))).collect(Collectors.toList());
    return ImmutableExecutableStage.of(components, environment, input, sideInputs, userStates, timers, transforms, outputs, wireCoderSettings);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Collection(java.util.Collection) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) Collectors(java.util.stream.Collectors) UserStateId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.UserStateId) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) List(java.util.List) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) TimerId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.TimerId) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Collections(java.util.Collections) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Example 19 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class Environments method getEnvironment.

public static Optional<Environment> getEnvironment(String ptransformId, Components components) {
    PTransform ptransform = components.getTransformsOrThrow(ptransformId);
    String envId = ptransform.getEnvironmentId();
    if (Strings.isNullOrEmpty(envId)) {
        // as a GroupByKeyPayload, and we return null in this case.
        return Optional.empty();
    } else {
        return Optional.of(components.getEnvironmentsOrThrow(envId));
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 20 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class SdkComponents method getEnvironmentIdFor.

public String getEnvironmentIdFor(ResourceHints resourceHints) {
    if (!environmentIdsByResourceHints.containsKey(resourceHints)) {
        String baseEnvironmentId = getOnlyEnvironmentId();
        if (resourceHints.hints().size() == 0) {
            environmentIdsByResourceHints.put(resourceHints, baseEnvironmentId);
        } else {
            Environment env = componentsBuilder.getEnvironmentsMap().get(baseEnvironmentId).toBuilder().putAllResourceHints(Maps.transformValues(resourceHints.hints(), hint -> ByteString.copyFrom(hint.toBytes()))).build();
            String name = uniqify(env.getUrn(), environmentIds.values());
            environmentIds.put(env, name);
            componentsBuilder.putEnvironments(name, env);
            environmentIdsByResourceHints.put(resourceHints, name);
        }
    }
    return environmentIdsByResourceHints.get(resourceHints);
}
Also used : Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)

Aggregations

Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)33 Test (org.junit.Test)28 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)12 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)11 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 Map (java.util.Map)7 RemoteEnvironment (org.apache.beam.runners.fnexecution.environment.RemoteEnvironment)7 Pipeline (org.apache.beam.sdk.Pipeline)7 IOException (java.io.IOException)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)5 EnvironmentFactory (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory)5 Provider (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider)5 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 ArrayList (java.util.ArrayList)4