use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class ArtifactStagingService method reverseArtifactRetrievalService.
@Override
public StreamObserver<ArtifactApi.ArtifactResponseWrapper> reverseArtifactRetrievalService(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
return new StreamObserver<ArtifactApi.ArtifactResponseWrapper>() {
/**
* The maximum number of parallel threads to use to stage.
*/
public static final int THREAD_POOL_SIZE = 10;
/**
* The maximum number of bytes to buffer across all writes before throttling.
*/
// 100 MB
public static final int MAX_PENDING_BYTES = 100 << 20;
IdGenerator idGenerator = IdGenerators.incrementingLongs();
String stagingToken;
Map<String, List<RunnerApi.ArtifactInformation>> toResolve;
Map<String, List<Future<RunnerApi.ArtifactInformation>>> stagedFutures;
ExecutorService stagingExecutor;
OverflowingSemaphore totalPendingBytes;
State state = State.START;
Queue<String> pendingResolves;
String currentEnvironment;
Queue<RunnerApi.ArtifactInformation> pendingGets;
BlockingQueue<ByteString> currentOutput;
@Override
@SuppressFBWarnings(value = "SF_SWITCH_FALLTHROUGH", justification = "fallthrough intended")
public synchronized // synchronization.
void onNext(ArtifactApi.ArtifactResponseWrapper responseWrapper) {
switch(state) {
case START:
stagingToken = responseWrapper.getStagingToken();
LOG.info("Staging artifacts for {}.", stagingToken);
toResolve = toStage.get(stagingToken);
if (toResolve == null) {
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Unknown staging token " + stagingToken)));
return;
}
stagedFutures = new ConcurrentHashMap<>();
pendingResolves = new ArrayDeque<>();
pendingResolves.addAll(toResolve.keySet());
stagingExecutor = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
totalPendingBytes = new OverflowingSemaphore(MAX_PENDING_BYTES);
resolveNextEnvironment(responseObserver);
break;
case RESOLVE:
{
currentEnvironment = pendingResolves.remove();
stagedFutures.put(currentEnvironment, new ArrayList<>());
pendingGets = new ArrayDeque<>();
for (RunnerApi.ArtifactInformation artifact : responseWrapper.getResolveArtifactResponse().getReplacementsList()) {
Optional<RunnerApi.ArtifactInformation> fetched = getLocal();
if (fetched.isPresent()) {
stagedFutures.get(currentEnvironment).add(CompletableFuture.completedFuture(fetched.get()));
} else {
pendingGets.add(artifact);
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setGetArtifact(ArtifactApi.GetArtifactRequest.newBuilder().setArtifact(artifact)).build());
}
}
LOG.info("Getting {} artifacts for {}.{}.", pendingGets.size(), stagingToken, pendingResolves.peek());
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
}
break;
}
case GET:
RunnerApi.ArtifactInformation currentArtifact = pendingGets.remove();
String name = createFilename(currentEnvironment, currentArtifact);
try {
LOG.debug("Storing artifacts for {} as {}", stagingToken, name);
currentOutput = new ArrayBlockingQueue<ByteString>(100);
stagedFutures.get(currentEnvironment).add(stagingExecutor.submit(new StoreArtifact(stagingToken, name, currentArtifact, currentOutput, totalPendingBytes)));
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
responseObserver.onError(exn);
}
state = State.GETCHUNK;
case GETCHUNK:
try {
ByteString chunk = responseWrapper.getGetArtifactResponse().getData();
if (chunk.size() > 0) {
// Make sure we don't accidentally send the EOF value.
totalPendingBytes.aquire(chunk.size());
currentOutput.put(chunk);
}
if (responseWrapper.getIsLast()) {
// The EOF value.
currentOutput.put(ByteString.EMPTY);
if (pendingGets.isEmpty()) {
resolveNextEnvironment(responseObserver);
} else {
state = State.GET;
LOG.debug("Waiting for {}", pendingGets.peek());
}
}
} catch (Exception exn) {
LOG.error("Error submitting.", exn);
onError(exn);
}
break;
default:
responseObserver.onError(new StatusException(Status.INVALID_ARGUMENT.withDescription("Illegal state " + state)));
}
}
private void resolveNextEnvironment(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
if (pendingResolves.isEmpty()) {
finishStaging(responseObserver);
} else {
state = State.RESOLVE;
LOG.info("Resolving artifacts for {}.{}.", stagingToken, pendingResolves.peek());
responseObserver.onNext(ArtifactApi.ArtifactRequestWrapper.newBuilder().setResolveArtifact(ArtifactApi.ResolveArtifactsRequest.newBuilder().addAllArtifacts(toResolve.get(pendingResolves.peek()))).build());
}
}
private void finishStaging(StreamObserver<ArtifactApi.ArtifactRequestWrapper> responseObserver) {
LOG.debug("Finishing staging for {}.", stagingToken);
Map<String, List<RunnerApi.ArtifactInformation>> staged = new HashMap<>();
try {
for (Map.Entry<String, List<Future<RunnerApi.ArtifactInformation>>> entry : stagedFutures.entrySet()) {
List<RunnerApi.ArtifactInformation> envStaged = new ArrayList<>();
for (Future<RunnerApi.ArtifactInformation> future : entry.getValue()) {
envStaged.add(future.get());
}
staged.put(entry.getKey(), envStaged);
}
ArtifactStagingService.this.staged.put(stagingToken, staged);
stagingExecutor.shutdown();
state = State.DONE;
LOG.info("Artifacts fully staged for {}.", stagingToken);
responseObserver.onCompleted();
} catch (Exception exn) {
LOG.error("Error staging artifacts", exn);
responseObserver.onError(exn);
state = State.ERROR;
return;
}
}
/**
* Return an alternative artifact if we do not need to get this over the artifact API, or
* possibly at all.
*/
private Optional<RunnerApi.ArtifactInformation> getLocal() {
return Optional.empty();
}
/**
* Attempts to provide a reasonable filename for the artifact.
*
* @param index a monotonically increasing index, which provides uniqueness
* @param environment the environment id
* @param artifact the artifact itself
*/
private String createFilename(String environment, RunnerApi.ArtifactInformation artifact) {
String path;
try {
if (artifact.getRoleUrn().equals(ArtifactRetrievalService.STAGING_TO_ARTIFACT_URN)) {
path = RunnerApi.ArtifactStagingToRolePayload.parseFrom(artifact.getRolePayload()).getStagedName();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.FILE_ARTIFACT_URN)) {
path = RunnerApi.ArtifactFilePayload.parseFrom(artifact.getTypePayload()).getPath();
} else if (artifact.getTypeUrn().equals(ArtifactRetrievalService.URL_ARTIFACT_URN)) {
path = RunnerApi.ArtifactUrlPayload.parseFrom(artifact.getTypePayload()).getUrl();
} else {
path = "artifact";
}
} catch (InvalidProtocolBufferException exn) {
throw new RuntimeException(exn);
}
// Limit to the last contiguous alpha-numeric sequence. In particular, this will exclude
// all path separators.
List<String> components = Splitter.onPattern("[^A-Za-z-_.]]").splitToList(path);
String base = components.get(components.size() - 1);
return clip(String.format("%s-%s-%s", idGenerator.getId(), clip(environment, 25), base), 100);
}
private String clip(String s, int maxLength) {
return s.length() < maxLength ? s : s.substring(0, maxLength);
}
@Override
public void onError(Throwable throwable) {
stagingExecutor.shutdownNow();
LOG.error("Error staging artifacts", throwable);
state = State.ERROR;
}
@Override
public void onCompleted() {
Preconditions.checkArgument(state == State.DONE);
}
};
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class DefaultJobBundleFactory method createEnvironmentCaches.
private ImmutableList<EnvironmentCacheAndLock> createEnvironmentCaches(ThrowingFunction<ServerFactory, ServerInfo> serverInfoCreator, int count) {
ImmutableList.Builder<EnvironmentCacheAndLock> caches = ImmutableList.builder();
for (int i = 0; i < count; i++) {
final Lock refLock;
if (environmentExpirationMillis > 0) {
// The lock ensures there is no race condition between expiring an environment and a client
// still attempting to use it, hence referencing it.
refLock = new ReentrantLock(true);
} else {
refLock = NoopLock.get();
}
CacheBuilder<Environment, WrappedSdkHarnessClient> cacheBuilder = CacheBuilder.newBuilder().removalListener(notification -> {
WrappedSdkHarnessClient client = notification.getValue();
final int refCount;
// We need to use a lock here to ensure we are not causing the environment to
// be removed if beforehand a StageBundleFactory has retrieved it but not yet
// issued ref() on it.
refLock.lock();
try {
refCount = client.unref();
} finally {
refLock.unlock();
}
if (refCount > 0) {
LOG.warn("Expiring environment {} with {} remaining bundle references. Taking note to clean it up during shutdown if the references are not removed by then.", notification.getKey(), refCount);
evictedActiveClients.add(client);
}
});
if (environmentExpirationMillis > 0) {
cacheBuilder.expireAfterWrite(environmentExpirationMillis, TimeUnit.MILLISECONDS);
}
LoadingCache<Environment, WrappedSdkHarnessClient> cache = cacheBuilder.build(new CacheLoader<Environment, WrappedSdkHarnessClient>() {
@Override
public WrappedSdkHarnessClient load(Environment environment) throws Exception {
EnvironmentFactory.Provider environmentFactoryProvider = environmentFactoryProviderMap.get(environment.getUrn());
ServerFactory serverFactory = environmentFactoryProvider.getServerFactory();
ServerInfo serverInfo = serverInfoCreator.apply(serverFactory);
String workerId = stageIdGenerator.getId();
serverInfo.getProvisioningServer().getService().registerEnvironment(workerId, environment);
EnvironmentFactory environmentFactory = environmentFactoryProvider.createEnvironmentFactory(serverInfo.getControlServer(), serverInfo.getLoggingServer(), serverInfo.getRetrievalServer(), serverInfo.getProvisioningServer(), clientPool, stageIdGenerator);
return WrappedSdkHarnessClient.wrapping(environmentFactory.createEnvironment(environment, workerId), serverInfo);
}
});
caches.add(new EnvironmentCacheAndLock(cache, refLock));
}
return caches.build();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class ExecutableStage method fromPayload.
/**
* Return an {@link ExecutableStage} constructed from the provided {@link FunctionSpec}
* representation.
*
* <p>See {@link #toPTransform} for how the payload is constructed.
*
* <p>Note: The payload contains some information redundant with the {@link PTransform} it is the
* payload of. The {@link ExecutableStagePayload} should be sufficiently rich to construct a
* {@code ProcessBundleDescriptor} using only the payload.
*/
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
Components components = payload.getComponents();
Environment environment = payload.getEnvironment();
Collection<WireCoderSetting> wireCoderSettings = payload.getWireCoderSettingsList();
PCollectionNode input = PipelineNode.pCollection(payload.getInput(), components.getPcollectionsOrThrow(payload.getInput()));
List<SideInputReference> sideInputs = payload.getSideInputsList().stream().map(sideInputId -> SideInputReference.fromSideInputId(sideInputId, components)).collect(Collectors.toList());
List<UserStateReference> userStates = payload.getUserStatesList().stream().map(userStateId -> UserStateReference.fromUserStateId(userStateId, components)).collect(Collectors.toList());
List<TimerReference> timers = payload.getTimersList().stream().map(timerId -> TimerReference.fromTimerId(timerId, components)).collect(Collectors.toList());
List<PTransformNode> transforms = payload.getTransformsList().stream().map(id -> PipelineNode.pTransform(id, components.getTransformsOrThrow(id))).collect(Collectors.toList());
List<PCollectionNode> outputs = payload.getOutputsList().stream().map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id))).collect(Collectors.toList());
return ImmutableExecutableStage.of(components, environment, input, sideInputs, userStates, timers, transforms, outputs, wireCoderSettings);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class Environments method getEnvironment.
public static Optional<Environment> getEnvironment(String ptransformId, Components components) {
PTransform ptransform = components.getTransformsOrThrow(ptransformId);
String envId = ptransform.getEnvironmentId();
if (Strings.isNullOrEmpty(envId)) {
// as a GroupByKeyPayload, and we return null in this case.
return Optional.empty();
} else {
return Optional.of(components.getEnvironmentsOrThrow(envId));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class SdkComponents method getEnvironmentIdFor.
public String getEnvironmentIdFor(ResourceHints resourceHints) {
if (!environmentIdsByResourceHints.containsKey(resourceHints)) {
String baseEnvironmentId = getOnlyEnvironmentId();
if (resourceHints.hints().size() == 0) {
environmentIdsByResourceHints.put(resourceHints, baseEnvironmentId);
} else {
Environment env = componentsBuilder.getEnvironmentsMap().get(baseEnvironmentId).toBuilder().putAllResourceHints(Maps.transformValues(resourceHints.hints(), hint -> ByteString.copyFrom(hint.toBytes()))).build();
String name = uniqify(env.getUrn(), environmentIds.values());
environmentIds.put(env, name);
componentsBuilder.putEnvironments(name, env);
environmentIdsByResourceHints.put(resourceHints, name);
}
}
return environmentIdsByResourceHints.get(resourceHints);
}
Aggregations