Search in sources :

Example 1 with ProvisionerContext

use of io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext in project cdap by caskdata.

the class ProvisioningService method createProvisionTask.

private Runnable createProvisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner) {
    ProgramRunId programRunId = taskInfo.getProgramRunId();
    ProgramOptions programOptions = taskInfo.getProgramOptions();
    Map<String, String> systemArgs = programOptions.getArguments().asMap();
    ProvisionerContext context;
    try {
        SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), locationFactory.create(taskInfo.getSecureKeysDir()), createSSHKeyPair(taskInfo));
        context = createContext(cConf, programOptions, programRunId, taskInfo.getUser(), taskInfo.getProvisionerProperties(), sshContext);
    } catch (IOException e) {
        runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Failed to load ssh key. The run will be marked as failed.", e));
        programStateWriter.error(programRunId, new IllegalStateException("Failed to load ssh key.", e));
        provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
        return () -> {
        };
    } catch (InvalidMacroException e) {
        runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Could not evaluate macros while provisoning. " + "The run will be marked as failed.", e));
        programStateWriter.error(programRunId, new IllegalStateException("Could not evaluate macros while provisioning", e));
        provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
        return () -> {
        };
    }
    // TODO: (CDAP-13246) pick up timeout from profile instead of hardcoding
    ProvisioningTask task = new ProvisionTask(taskInfo, transactionRunner, provisioner, context, provisionerNotifier, programStateWriter, 300);
    ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.PROVISION);
    return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
        try {
            return task.executeOnce();
        } catch (InterruptedException e) {
            LOG.debug("Provision task for program run {} interrupted.", taskInfo.getProgramRunId());
            throw e;
        } catch (Exception e) {
            LOG.info("Provision task for program run {} failed.", taskInfo.getProgramRunId(), e);
            throw e;
        }
    }));
}
Also used : SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) ProvisioningTask(io.cdap.cdap.internal.provision.task.ProvisioningTask) IOException(java.io.IOException) ProvisionTask(io.cdap.cdap.internal.provision.task.ProvisionTask) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) NotFoundException(io.cdap.cdap.common.NotFoundException) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId)

Example 2 with ProvisionerContext

use of io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext in project cdap by caskdata.

the class ProvisioningService method getRuntimeJobManager.

/**
 * Returns runtime job manager implementation.
 *
 * @param programRunId program run
 * @param programOptions program options
 * @return an object of runtime job manager
 */
public Optional<RuntimeJobManager> getRuntimeJobManager(ProgramRunId programRunId, ProgramOptions programOptions) {
    Map<String, String> systemArgs = programOptions.getArguments().asMap();
    String name = SystemArguments.getProfileProvisioner(systemArgs);
    Provisioner provisioner = provisionerInfo.get().provisioners.get(name);
    String user = programOptions.getArguments().getOption(ProgramOptionConstants.USER_ID);
    Map<String, String> properties = SystemArguments.getProfileProperties(systemArgs);
    ProvisionerContext context = createContext(cConf, programOptions, programRunId, user, properties, null);
    return provisioner.getRuntimeJobManager(context);
}
Also used : ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) Provisioner(io.cdap.cdap.runtime.spi.provisioner.Provisioner)

Example 3 with ProvisionerContext

use of io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext in project cdap by caskdata.

the class DataprocProvisioner method tryReuseCluster.

/**
 * If cluster reuse is enabled & possible tries to find a cluster to reuse.
 * @param client data proc client
 * @param context provisioner contex
 * @param conf dataproc configuration
 * @return a cluster ready to reuse or null if none available.
 */
@Nullable
private Cluster tryReuseCluster(DataprocClient client, ProvisionerContext context, DataprocConf conf) throws RetryableProvisionException, IOException {
    if (!isReuseSupported(conf)) {
        LOG.debug("Not checking cluster reuse, enabled: {}, skip delete: {}, idle ttl: {}, reuse threshold: {}", conf.isClusterReuseEnabled(), conf.isSkipDelete(), conf.getIdleTTLMinutes(), conf.getClusterReuseThresholdMinutes());
        return null;
    }
    String clusterKey = getRunKey(context);
    // For idempotency, check if we already have the cluster allocated
    Optional<Cluster> clusterOptional = findCluster(clusterKey, client);
    if (clusterOptional.isPresent()) {
        Cluster cluster = clusterOptional.get();
        if (cluster.getStatus() == ClusterStatus.CREATING || cluster.getStatus() == ClusterStatus.RUNNING) {
            LOG.debug("Found allocated cluster {}", cluster.getName());
            return cluster;
        } else {
            LOG.debug("Preallocated cluster {} has expired, will find a new one");
            // Let's remove the reuse label to ensure new cluster will be picked up by findCluster
            try {
                client.updateClusterLabels(cluster.getName(), Collections.emptyMap(), Collections.singleton(LABEL_RUN_KEY));
            } catch (Exception e) {
                LOG.trace("Unable to remove reuse label, cluster may have died already", e);
                if (!LOG.isTraceEnabled()) {
                    LOG.debug("Unable to remove reuse label, cluster may have died already");
                }
            }
        }
    }
    Lock reuseLock = getSystemContext().getLock(REUSE_LOCK);
    reuseLock.lock();
    try {
        Map<String, String> filter = new HashMap<>();
        String normalizedProfileName = getNormalizedProfileName(context);
        if (normalizedProfileName != null) {
            filter.put(LABEL_PROFILE, normalizedProfileName);
        }
        filter.put(LABEL_VERSON, getVersionLabel());
        filter.put(LABEL_REUSE_KEY, conf.getClusterReuseKey());
        filter.put(LABEL_REUSE_UNTIL, "*");
        Optional<Cluster> cluster = client.getClusters(ClusterStatus.RUNNING, filter, clientCluster -> {
            // Verify reuse label
            long reuseUntil = Long.valueOf(clientCluster.getLabelsOrDefault(LABEL_REUSE_UNTIL, "0"));
            long now = System.currentTimeMillis();
            if (reuseUntil < now) {
                LOG.debug("Skipping expired cluster {}, reuse until {} is before now {}", clientCluster.getClusterName(), reuseUntil, now);
                return false;
            }
            return true;
        }).findAny();
        if (cluster.isPresent()) {
            String clusterName = cluster.get().getName();
            LOG.info("Found cluster to reuse: {}", clusterName);
            // Add cdap-reuse-for to find cluster later if needed
            // And remove reuseUntil to indicate the cluster is taken
            client.updateClusterLabels(clusterName, Collections.singletonMap(LABEL_RUN_KEY, clusterKey), Collections.singleton(LABEL_REUSE_UNTIL));
        } else {
            LOG.debug("Could not find any available cluster to reuse.");
        }
        return cluster.orElse(null);
    } catch (Exception e) {
        LOG.warn("Error retrieving clusters to reuse, will create a new one", e);
        return null;
    } finally {
        reuseLock.unlock();
    }
}
Also used : RuntimeMonitorType(io.cdap.cdap.runtime.spi.RuntimeMonitorType) DataprocUtils(io.cdap.cdap.runtime.spi.common.DataprocUtils) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ClusterStatus(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus) SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) ClusterOperationMetadata(com.google.cloud.dataproc.v1.ClusterOperationMetadata) PollingStrategy(io.cdap.cdap.runtime.spi.provisioner.PollingStrategy) GeneralSecurityException(java.security.GeneralSecurityException) Map(java.util.Map) ProgramRunInfo(io.cdap.cdap.runtime.spi.ProgramRunInfo) Nullable(javax.annotation.Nullable) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) PollingStrategies(io.cdap.cdap.runtime.spi.provisioner.PollingStrategies) ImmutableSet(com.google.common.collect.ImmutableSet) Logger(org.slf4j.Logger) ProvisionerSpecification(io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification) IOException(java.io.IOException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Lock(java.util.concurrent.locks.Lock) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Pattern(java.util.regex.Pattern) Collections(java.util.Collections) HashMap(java.util.HashMap) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) GeneralSecurityException(java.security.GeneralSecurityException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException) Lock(java.util.concurrent.locks.Lock) Nullable(javax.annotation.Nullable)

Example 4 with ProvisionerContext

use of io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext in project cdap by caskdata.

the class ExistingDataprocProvisioner method createCluster.

@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
    Map<String, String> contextProperties = createContextProperties(context);
    DataprocConf conf = DataprocConf.create(contextProperties);
    if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH) {
        String sshUser = contextProperties.get(SSH_USER);
        String sshKey = contextProperties.get(SSH_KEY);
        if (Strings.isNullOrEmpty(sshUser) || Strings.isNullOrEmpty(sshKey)) {
            throw new DataprocRuntimeException("SSH User and key are required for monitoring through SSH.");
        }
        SSHKeyPair sshKeyPair = new SSHKeyPair(new SSHPublicKey(sshUser, ""), () -> sshKey.getBytes(StandardCharsets.UTF_8));
        // The ssh context shouldn't be null, but protect it in case there is platform bug
        Optional.ofNullable(context.getSSHContext()).ifPresent(c -> c.setSSHKeyPair(sshKeyPair));
    }
    String clusterName = contextProperties.get(CLUSTER_NAME);
    try (DataprocClient client = DataprocClient.fromConf(conf, false)) {
        try {
            client.updateClusterLabels(clusterName, getSystemLabels());
        } catch (DataprocRuntimeException e) {
            // Only log the stacktrace if trace log level is enabled
            if (LOG.isTraceEnabled()) {
                LOG.trace("Cannot update cluster labels due to {}", e.getMessage(), e);
            } else {
                LOG.debug("Cannot update cluster labels due to {}", e.getMessage());
            }
        }
        return client.getCluster(clusterName).filter(c -> c.getStatus() == ClusterStatus.RUNNING).orElseThrow(() -> new DataprocRuntimeException("Dataproc cluster " + clusterName + " does not exist or not in running state."));
    }
}
Also used : RuntimeMonitorType(io.cdap.cdap.runtime.spi.RuntimeMonitorType) PollingStrategies(io.cdap.cdap.runtime.spi.provisioner.PollingStrategies) Logger(org.slf4j.Logger) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) ProvisionerSpecification(io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification) LoggerFactory(org.slf4j.LoggerFactory) ClusterStatus(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus) StandardCharsets(java.nio.charset.StandardCharsets) TimeUnit(java.util.concurrent.TimeUnit) Strings(com.google.common.base.Strings) PollingStrategy(io.cdap.cdap.runtime.spi.provisioner.PollingStrategy) Map(java.util.Map) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey) Optional(java.util.Optional) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey)

Example 5 with ProvisionerContext

use of io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext in project cdap by caskdata.

the class ProvisioningService method getClusterStatus.

/**
 * Returns the {@link ClusterStatus} for the cluster being used to execute the given program run.
 *
 * @param programRunId the program run id for checking the cluster status
 * @param programOptions the program options for the given run
 * @param cluster the {@link Cluster} information for the given run
 * @param userId the user id to use for {@link SecureStore} operation.
 * @return the {@link ClusterStatus}
 * @throws Exception if non-retryable exception is encountered when querying cluster status
 */
public ClusterStatus getClusterStatus(ProgramRunId programRunId, ProgramOptions programOptions, Cluster cluster, String userId) throws Exception {
    Map<String, String> systemArgs = programOptions.getArguments().asMap();
    String name = SystemArguments.getProfileProvisioner(systemArgs);
    Provisioner provisioner = provisionerInfo.get().provisioners.get(name);
    // If there is no provisioner available, we can't do anything further, hence returning NOT_EXISTS
    if (provisioner == null) {
        return ClusterStatus.NOT_EXISTS;
    }
    Map<String, String> properties = SystemArguments.getProfileProperties(systemArgs);
    // Create the ProvisionerContext and query the cluster status using the provisioner
    ProvisionerContext context;
    try {
        DefaultSSHContext defaultSSHContext = null;
        if (!getRuntimeJobManager(programRunId, programOptions).isPresent()) {
            defaultSSHContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), null, null);
        }
        context = createContext(cConf, programOptions, programRunId, userId, properties, defaultSSHContext);
    } catch (InvalidMacroException e) {
        // This shouldn't happen
        runWithProgramLogging(programRunId, systemArgs, () -> LOG.error("Could not evaluate macros while checking cluster status.", e));
        return ClusterStatus.NOT_EXISTS;
    }
    return Retries.callWithRetries(() -> provisioner.getClusterStatus(context, cluster), RetryStrategies.exponentialDelay(1, 5, TimeUnit.SECONDS), RetryableProvisionException.class::isInstance);
}
Also used : InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) Provisioner(io.cdap.cdap.runtime.spi.provisioner.Provisioner)

Aggregations

ProvisionerContext (io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext)6 RetryableProvisionException (io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException)4 InvalidMacroException (io.cdap.cdap.api.macro.InvalidMacroException)3 SSHContext (io.cdap.cdap.runtime.spi.ssh.SSHContext)3 SSHKeyPair (io.cdap.cdap.runtime.spi.ssh.SSHKeyPair)3 IOException (java.io.IOException)3 NotFoundException (io.cdap.cdap.common.NotFoundException)2 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)2 RuntimeMonitorType (io.cdap.cdap.runtime.spi.RuntimeMonitorType)2 Cluster (io.cdap.cdap.runtime.spi.provisioner.Cluster)2 ClusterStatus (io.cdap.cdap.runtime.spi.provisioner.ClusterStatus)2 PollingStrategies (io.cdap.cdap.runtime.spi.provisioner.PollingStrategies)2 PollingStrategy (io.cdap.cdap.runtime.spi.provisioner.PollingStrategy)2 Provisioner (io.cdap.cdap.runtime.spi.provisioner.Provisioner)2 ProvisionerSpecification (io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification)2 ConnectException (java.net.ConnectException)2 SocketTimeoutException (java.net.SocketTimeoutException)2 Map (java.util.Map)2 Optional (java.util.Optional)2 TimeUnit (java.util.concurrent.TimeUnit)2