use of io.cdap.cdap.runtime.spi.ssh.SSHContext in project cdap by caskdata.
the class ProvisioningService method createProvisionTask.
private Runnable createProvisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner) {
ProgramRunId programRunId = taskInfo.getProgramRunId();
ProgramOptions programOptions = taskInfo.getProgramOptions();
Map<String, String> systemArgs = programOptions.getArguments().asMap();
ProvisionerContext context;
try {
SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), locationFactory.create(taskInfo.getSecureKeysDir()), createSSHKeyPair(taskInfo));
context = createContext(cConf, programOptions, programRunId, taskInfo.getUser(), taskInfo.getProvisionerProperties(), sshContext);
} catch (IOException e) {
runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Failed to load ssh key. The run will be marked as failed.", e));
programStateWriter.error(programRunId, new IllegalStateException("Failed to load ssh key.", e));
provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
return () -> {
};
} catch (InvalidMacroException e) {
runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Could not evaluate macros while provisoning. " + "The run will be marked as failed.", e));
programStateWriter.error(programRunId, new IllegalStateException("Could not evaluate macros while provisioning", e));
provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
return () -> {
};
}
// TODO: (CDAP-13246) pick up timeout from profile instead of hardcoding
ProvisioningTask task = new ProvisionTask(taskInfo, transactionRunner, provisioner, context, provisionerNotifier, programStateWriter, 300);
ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.PROVISION);
return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
try {
return task.executeOnce();
} catch (InterruptedException e) {
LOG.debug("Provision task for program run {} interrupted.", taskInfo.getProgramRunId());
throw e;
} catch (Exception e) {
LOG.info("Provision task for program run {} failed.", taskInfo.getProgramRunId(), e);
throw e;
}
}));
}
use of io.cdap.cdap.runtime.spi.ssh.SSHContext in project cdap by caskdata.
the class DataprocProvisioner method createCluster.
@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
DataprocConf conf = DataprocConf.create(createContextProperties(context));
if (!isAutoscalingFieldsValid(conf, createContextProperties(context))) {
LOG.warn("The configs : {}, {}, {} will not be considered when {} is enabled ", DataprocConf.WORKER_NUM_NODES, DataprocConf.SECONDARY_WORKER_NUM_NODES, DataprocConf.AUTOSCALING_POLICY, DataprocConf.PREDEFINED_AUTOSCALE_ENABLED);
}
if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH || !conf.isRuntimeJobManagerEnabled()) {
// Generates and set the ssh key if it does not have one.
// Since invocation of this method can come from a retry, we don't need to keep regenerating the keys
SSHContext sshContext = context.getSSHContext();
if (sshContext != null) {
SSHKeyPair sshKeyPair = sshContext.getSSHKeyPair().orElse(null);
if (sshKeyPair == null) {
sshKeyPair = sshContext.generate("cdap");
sshContext.setSSHKeyPair(sshKeyPair);
}
conf = DataprocConf.create(createContextProperties(context), sshKeyPair.getPublicKey());
}
}
try (DataprocClient client = getClient(conf)) {
Cluster reused = tryReuseCluster(client, context, conf);
if (reused != null) {
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.reuse.count");
return reused;
}
String clusterName = getRunKey(context);
// if it already exists, it means this is a retry. We can skip actually making the request
Optional<Cluster> existing = client.getCluster(clusterName);
if (existing.isPresent()) {
return existing.get();
}
String imageVersion = getImageVersion(context, conf);
String imageDescription = conf.getCustomImageUri();
if (imageDescription == null || imageDescription.isEmpty()) {
imageDescription = imageVersion;
}
// Reload system context properties and get system labels
Map<String, String> labels = new HashMap<>();
labels.putAll(getSystemLabels());
labels.putAll(getReuseLabels(context, conf));
labels.putAll(conf.getClusterLabels());
LOG.info("Creating Dataproc cluster {} in project {}, in region {}, with image {}, with labels {}, endpoint {}", clusterName, conf.getProjectId(), conf.getRegion(), imageDescription, labels, getRootUrl(conf));
boolean privateInstance = Boolean.parseBoolean(getSystemContext().getProperties().get(PRIVATE_INSTANCE));
ClusterOperationMetadata createOperationMeta = client.createCluster(clusterName, imageVersion, labels, privateInstance);
int numWarnings = createOperationMeta.getWarningsCount();
if (numWarnings > 0) {
LOG.warn("Encountered {} warning{} while creating Dataproc cluster:\n{}", numWarnings, numWarnings > 1 ? "s" : "", String.join("\n", createOperationMeta.getWarningsList()));
}
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count");
return new Cluster(clusterName, ClusterStatus.CREATING, Collections.emptyList(), Collections.emptyMap());
} catch (Exception e) {
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count", e);
throw e;
}
}
use of io.cdap.cdap.runtime.spi.ssh.SSHContext in project cdap by caskdata.
the class ClusterCreateSubtask method execute.
@Override
public Cluster execute(Cluster cluster) throws Exception {
Cluster nextCluster = provisioner.createCluster(provisionerContext);
SSHContext sshContext = provisionerContext.getSSHContext();
// ssh context can be null if ssh is not being used to submit job
if (sshContext == null) {
return new Cluster(nextCluster.getName(), nextCluster.getStatus(), nextCluster.getNodes(), nextCluster.getProperties());
}
Map<String, String> properties = new HashMap<>(nextCluster.getProperties());
// Set the SSH user if the provisioner sets it.
provisionerContext.getSSHContext().getSSHKeyPair().ifPresent(sshKeyPair -> {
properties.put(Constants.RuntimeMonitor.SSH_USER, sshKeyPair.getPublicKey().getUser());
});
return new Cluster(nextCluster.getName(), nextCluster.getStatus(), nextCluster.getNodes(), properties);
}
use of io.cdap.cdap.runtime.spi.ssh.SSHContext in project cdap by caskdata.
the class ProvisioningService method createDeprovisionTask.
private Runnable createDeprovisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner, Consumer<ProgramRunId> taskCleanup) {
Map<String, String> properties = taskInfo.getProvisionerProperties();
ProvisionerContext context;
SSHKeyPair sshKeyPair = null;
try {
sshKeyPair = createSSHKeyPair(taskInfo);
} catch (IOException e) {
LOG.warn("Failed to load ssh key. No SSH key will be available for the deprovision task", e);
}
ProgramRunId programRunId = taskInfo.getProgramRunId();
Map<String, String> systemArgs = taskInfo.getProgramOptions().getArguments().asMap();
try {
SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), null, sshKeyPair);
context = createContext(cConf, taskInfo.getProgramOptions(), programRunId, taskInfo.getUser(), properties, sshContext);
} catch (InvalidMacroException e) {
runWithProgramLogging(programRunId, systemArgs, () -> LOG.error("Could not evaluate macros while deprovisoning. " + "The cluster will be marked as orphaned.", e));
provisionerNotifier.orphaned(programRunId);
return () -> {
};
}
DeprovisionTask task = new DeprovisionTask(taskInfo, transactionRunner, 300, provisioner, context, provisionerNotifier, locationFactory);
ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.DEPROVISION);
return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
try {
long delay = task.executeOnce();
if (delay < 0) {
taskCleanup.accept(programRunId);
}
return delay;
} catch (InterruptedException e) {
// We can get interrupted if the task is cancelled or CDAP is stopped. In either case, just return.
// If it was cancelled, state cleanup is left to the caller. If it was CDAP master stopping, the task
// will be resumed on master startup
LOG.debug("Deprovision task for program run {} interrupted.", programRunId);
throw e;
} catch (Exception e) {
// Otherwise, if there was an error deprovisioning, run the cleanup
LOG.info("Deprovision task for program run {} failed.", programRunId, e);
taskCleanup.accept(programRunId);
throw e;
}
}));
}
Aggregations