Search in sources :

Example 1 with SSHKeyPair

use of io.cdap.cdap.runtime.spi.ssh.SSHKeyPair in project cdap by caskdata.

the class DataprocProvisioner method createCluster.

@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
    DataprocConf conf = DataprocConf.create(createContextProperties(context));
    if (!isAutoscalingFieldsValid(conf, createContextProperties(context))) {
        LOG.warn("The configs : {}, {}, {} will not be considered when {} is enabled ", DataprocConf.WORKER_NUM_NODES, DataprocConf.SECONDARY_WORKER_NUM_NODES, DataprocConf.AUTOSCALING_POLICY, DataprocConf.PREDEFINED_AUTOSCALE_ENABLED);
    }
    if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH || !conf.isRuntimeJobManagerEnabled()) {
        // Generates and set the ssh key if it does not have one.
        // Since invocation of this method can come from a retry, we don't need to keep regenerating the keys
        SSHContext sshContext = context.getSSHContext();
        if (sshContext != null) {
            SSHKeyPair sshKeyPair = sshContext.getSSHKeyPair().orElse(null);
            if (sshKeyPair == null) {
                sshKeyPair = sshContext.generate("cdap");
                sshContext.setSSHKeyPair(sshKeyPair);
            }
            conf = DataprocConf.create(createContextProperties(context), sshKeyPair.getPublicKey());
        }
    }
    try (DataprocClient client = getClient(conf)) {
        Cluster reused = tryReuseCluster(client, context, conf);
        if (reused != null) {
            DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.reuse.count");
            return reused;
        }
        String clusterName = getRunKey(context);
        // if it already exists, it means this is a retry. We can skip actually making the request
        Optional<Cluster> existing = client.getCluster(clusterName);
        if (existing.isPresent()) {
            return existing.get();
        }
        String imageVersion = getImageVersion(context, conf);
        String imageDescription = conf.getCustomImageUri();
        if (imageDescription == null || imageDescription.isEmpty()) {
            imageDescription = imageVersion;
        }
        // Reload system context properties and get system labels
        Map<String, String> labels = new HashMap<>();
        labels.putAll(getSystemLabels());
        labels.putAll(getReuseLabels(context, conf));
        labels.putAll(conf.getClusterLabels());
        LOG.info("Creating Dataproc cluster {} in project {}, in region {}, with image {}, with labels {}, endpoint {}", clusterName, conf.getProjectId(), conf.getRegion(), imageDescription, labels, getRootUrl(conf));
        boolean privateInstance = Boolean.parseBoolean(getSystemContext().getProperties().get(PRIVATE_INSTANCE));
        ClusterOperationMetadata createOperationMeta = client.createCluster(clusterName, imageVersion, labels, privateInstance);
        int numWarnings = createOperationMeta.getWarningsCount();
        if (numWarnings > 0) {
            LOG.warn("Encountered {} warning{} while creating Dataproc cluster:\n{}", numWarnings, numWarnings > 1 ? "s" : "", String.join("\n", createOperationMeta.getWarningsList()));
        }
        DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count");
        return new Cluster(clusterName, ClusterStatus.CREATING, Collections.emptyList(), Collections.emptyMap());
    } catch (Exception e) {
        DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count", e);
        throw e;
    }
}
Also used : SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) ClusterOperationMetadata(com.google.cloud.dataproc.v1.ClusterOperationMetadata) HashMap(java.util.HashMap) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) GeneralSecurityException(java.security.GeneralSecurityException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException)

Example 2 with SSHKeyPair

use of io.cdap.cdap.runtime.spi.ssh.SSHKeyPair in project cdap by caskdata.

the class ExistingDataprocProvisioner method createCluster.

@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
    Map<String, String> contextProperties = createContextProperties(context);
    DataprocConf conf = DataprocConf.create(contextProperties);
    if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH) {
        String sshUser = contextProperties.get(SSH_USER);
        String sshKey = contextProperties.get(SSH_KEY);
        if (Strings.isNullOrEmpty(sshUser) || Strings.isNullOrEmpty(sshKey)) {
            throw new DataprocRuntimeException("SSH User and key are required for monitoring through SSH.");
        }
        SSHKeyPair sshKeyPair = new SSHKeyPair(new SSHPublicKey(sshUser, ""), () -> sshKey.getBytes(StandardCharsets.UTF_8));
        // The ssh context shouldn't be null, but protect it in case there is platform bug
        Optional.ofNullable(context.getSSHContext()).ifPresent(c -> c.setSSHKeyPair(sshKeyPair));
    }
    String clusterName = contextProperties.get(CLUSTER_NAME);
    try (DataprocClient client = DataprocClient.fromConf(conf, false)) {
        try {
            client.updateClusterLabels(clusterName, getSystemLabels());
        } catch (DataprocRuntimeException e) {
            // Only log the stacktrace if trace log level is enabled
            if (LOG.isTraceEnabled()) {
                LOG.trace("Cannot update cluster labels due to {}", e.getMessage(), e);
            } else {
                LOG.debug("Cannot update cluster labels due to {}", e.getMessage());
            }
        }
        return client.getCluster(clusterName).filter(c -> c.getStatus() == ClusterStatus.RUNNING).orElseThrow(() -> new DataprocRuntimeException("Dataproc cluster " + clusterName + " does not exist or not in running state."));
    }
}
Also used : RuntimeMonitorType(io.cdap.cdap.runtime.spi.RuntimeMonitorType) PollingStrategies(io.cdap.cdap.runtime.spi.provisioner.PollingStrategies) Logger(org.slf4j.Logger) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) ProvisionerSpecification(io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification) LoggerFactory(org.slf4j.LoggerFactory) ClusterStatus(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus) StandardCharsets(java.nio.charset.StandardCharsets) TimeUnit(java.util.concurrent.TimeUnit) Strings(com.google.common.base.Strings) PollingStrategy(io.cdap.cdap.runtime.spi.provisioner.PollingStrategy) Map(java.util.Map) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey) Optional(java.util.Optional) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey)

Example 3 with SSHKeyPair

use of io.cdap.cdap.runtime.spi.ssh.SSHKeyPair in project cdap by caskdata.

the class ElasticMapReduceProvisioner method createCluster.

@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
    // Generates and set the ssh key
    // or 'hadoop'
    SSHKeyPair sshKeyPair = context.getSSHContext().generate("ec2-user");
    context.getSSHContext().setSSHKeyPair(sshKeyPair);
    EMRConf conf = EMRConf.fromProvisionerContext(context);
    String clusterName = getClusterName(context.getProgramRunInfo());
    try (EMRClient client = EMRClient.fromConf(conf)) {
        // if it already exists, it means this is a retry. We can skip actually making the request
        Optional<ClusterSummary> existing = client.getUnterminatedClusterByName(clusterName);
        if (existing.isPresent()) {
            return client.getCluster(existing.get().getId()).get();
        }
        String clusterId = client.createCluster(clusterName);
        return new Cluster(clusterId, ClusterStatus.CREATING, Collections.emptyList(), Collections.emptyMap());
    }
}
Also used : SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) ClusterSummary(com.amazonaws.services.elasticmapreduce.model.ClusterSummary) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster)

Example 4 with SSHKeyPair

use of io.cdap.cdap.runtime.spi.ssh.SSHKeyPair in project cdap by caskdata.

the class RemoteHadoopConf method fromProperties.

/**
 * Create the conf from a property map while also performing validation.
 */
public static RemoteHadoopConf fromProperties(Map<String, String> properties) {
    String host = getString(properties, "host");
    String user = getString(properties, "user");
    String privateKey = getString(properties, "sshKey");
    SSHKeyPair keyPair = new SSHKeyPair(new SSHPublicKey(user, ""), () -> privateKey.getBytes(StandardCharsets.UTF_8));
    return new RemoteHadoopConf(keyPair, host, properties.get("initializationAction"), properties.get("kerberosPrincipal"), properties.get("kerberosKeytabPath"));
}
Also used : SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey)

Example 5 with SSHKeyPair

use of io.cdap.cdap.runtime.spi.ssh.SSHKeyPair in project cdap by caskdata.

the class ProvisioningService method createDeprovisionTask.

private Runnable createDeprovisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner, Consumer<ProgramRunId> taskCleanup) {
    Map<String, String> properties = taskInfo.getProvisionerProperties();
    ProvisionerContext context;
    SSHKeyPair sshKeyPair = null;
    try {
        sshKeyPair = createSSHKeyPair(taskInfo);
    } catch (IOException e) {
        LOG.warn("Failed to load ssh key. No SSH key will be available for the deprovision task", e);
    }
    ProgramRunId programRunId = taskInfo.getProgramRunId();
    Map<String, String> systemArgs = taskInfo.getProgramOptions().getArguments().asMap();
    try {
        SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), null, sshKeyPair);
        context = createContext(cConf, taskInfo.getProgramOptions(), programRunId, taskInfo.getUser(), properties, sshContext);
    } catch (InvalidMacroException e) {
        runWithProgramLogging(programRunId, systemArgs, () -> LOG.error("Could not evaluate macros while deprovisoning. " + "The cluster will be marked as orphaned.", e));
        provisionerNotifier.orphaned(programRunId);
        return () -> {
        };
    }
    DeprovisionTask task = new DeprovisionTask(taskInfo, transactionRunner, 300, provisioner, context, provisionerNotifier, locationFactory);
    ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.DEPROVISION);
    return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
        try {
            long delay = task.executeOnce();
            if (delay < 0) {
                taskCleanup.accept(programRunId);
            }
            return delay;
        } catch (InterruptedException e) {
            // We can get interrupted if the task is cancelled or CDAP is stopped. In either case, just return.
            // If it was cancelled, state cleanup is left to the caller. If it was CDAP master stopping, the task
            // will be resumed on master startup
            LOG.debug("Deprovision task for program run {} interrupted.", programRunId);
            throw e;
        } catch (Exception e) {
            // Otherwise, if there was an error deprovisioning, run the cleanup
            LOG.info("Deprovision task for program run {} failed.", programRunId, e);
            taskCleanup.accept(programRunId);
            throw e;
        }
    }));
}
Also used : SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) IOException(java.io.IOException) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) NotFoundException(io.cdap.cdap.common.NotFoundException) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) DeprovisionTask(io.cdap.cdap.internal.provision.task.DeprovisionTask) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId)

Aggregations

SSHKeyPair (io.cdap.cdap.runtime.spi.ssh.SSHKeyPair)7 Cluster (io.cdap.cdap.runtime.spi.provisioner.Cluster)4 ProvisionerContext (io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext)3 RetryableProvisionException (io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException)3 SSHContext (io.cdap.cdap.runtime.spi.ssh.SSHContext)3 SSHPublicKey (io.cdap.cdap.runtime.spi.ssh.SSHPublicKey)3 IOException (java.io.IOException)3 InvalidMacroException (io.cdap.cdap.api.macro.InvalidMacroException)2 NotFoundException (io.cdap.cdap.common.NotFoundException)2 DeprovisionTask (io.cdap.cdap.internal.provision.task.DeprovisionTask)2 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)2 RuntimeMonitorType (io.cdap.cdap.runtime.spi.RuntimeMonitorType)2 ClusterStatus (io.cdap.cdap.runtime.spi.provisioner.ClusterStatus)2 ProvisionerSpecification (io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification)2 ConnectException (java.net.ConnectException)2 SocketTimeoutException (java.net.SocketTimeoutException)2 Map (java.util.Map)2 Optional (java.util.Optional)2 TimeUnit (java.util.concurrent.TimeUnit)2 Logger (org.slf4j.Logger)2