use of com.google.cloud.dataproc.v1.Cluster in project cdap by caskdata.
the class DataprocClient method getClusterStatus.
/**
* Get the status of the specified cluster.
*
* @param name the cluster name
* @return the cluster status
* @throws RetryableProvisionException if there was a non 4xx error code returned
*/
io.cdap.cdap.runtime.spi.provisioner.ClusterStatus getClusterStatus(String name) throws RetryableProvisionException {
io.cdap.cdap.runtime.spi.provisioner.ClusterStatus status = getDataprocCluster(name).map(cluster -> convertStatus(cluster.getStatus())).orElse(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus.NOT_EXISTS);
// if it failed, try to get the create operation and log the error message
try {
if (status == io.cdap.cdap.runtime.spi.provisioner.ClusterStatus.FAILED) {
String resourceName = String.format("projects/%s/regions/%s/operations", conf.getProjectId(), conf.getRegion());
String filter = String.format("clusterName=%s AND operationType=CREATE", name);
OperationsClient.ListOperationsPagedResponse operationsResponse = client.getOperationsClient().listOperations(resourceName, filter);
OperationsClient.ListOperationsPage page = operationsResponse.getPage();
if (page == null) {
LOG.warn("Unable to get the cause of the cluster creation failure.");
return status;
}
if (page.getPageElementCount() > 1) {
// shouldn't be possible
LOG.warn("Multiple create operations found for cluster {}, may not be able to find the failure message.", name);
}
if (page.getPageElementCount() > 0) {
Operation operation = page.getValues().iterator().next();
Status operationError = operation.getError();
if (operationError != null) {
LOG.warn("Failed to create cluster {}: {}", name, operationError.getMessage());
}
}
}
} catch (Exception e) {
// if we failed to get the operations list, log an error and proceed with normal execution
LOG.warn("Unable to get the cause of the cluster creation failure.", e);
}
return status;
}
use of com.google.cloud.dataproc.v1.Cluster in project cdap by caskdata.
the class DataprocProvisioner method createCluster.
@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
DataprocConf conf = DataprocConf.create(createContextProperties(context));
if (!isAutoscalingFieldsValid(conf, createContextProperties(context))) {
LOG.warn("The configs : {}, {}, {} will not be considered when {} is enabled ", DataprocConf.WORKER_NUM_NODES, DataprocConf.SECONDARY_WORKER_NUM_NODES, DataprocConf.AUTOSCALING_POLICY, DataprocConf.PREDEFINED_AUTOSCALE_ENABLED);
}
if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH || !conf.isRuntimeJobManagerEnabled()) {
// Generates and set the ssh key if it does not have one.
// Since invocation of this method can come from a retry, we don't need to keep regenerating the keys
SSHContext sshContext = context.getSSHContext();
if (sshContext != null) {
SSHKeyPair sshKeyPair = sshContext.getSSHKeyPair().orElse(null);
if (sshKeyPair == null) {
sshKeyPair = sshContext.generate("cdap");
sshContext.setSSHKeyPair(sshKeyPair);
}
conf = DataprocConf.create(createContextProperties(context), sshKeyPair.getPublicKey());
}
}
try (DataprocClient client = getClient(conf)) {
Cluster reused = tryReuseCluster(client, context, conf);
if (reused != null) {
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.reuse.count");
return reused;
}
String clusterName = getRunKey(context);
// if it already exists, it means this is a retry. We can skip actually making the request
Optional<Cluster> existing = client.getCluster(clusterName);
if (existing.isPresent()) {
return existing.get();
}
String imageVersion = getImageVersion(context, conf);
String imageDescription = conf.getCustomImageUri();
if (imageDescription == null || imageDescription.isEmpty()) {
imageDescription = imageVersion;
}
// Reload system context properties and get system labels
Map<String, String> labels = new HashMap<>();
labels.putAll(getSystemLabels());
labels.putAll(getReuseLabels(context, conf));
labels.putAll(conf.getClusterLabels());
LOG.info("Creating Dataproc cluster {} in project {}, in region {}, with image {}, with labels {}, endpoint {}", clusterName, conf.getProjectId(), conf.getRegion(), imageDescription, labels, getRootUrl(conf));
boolean privateInstance = Boolean.parseBoolean(getSystemContext().getProperties().get(PRIVATE_INSTANCE));
ClusterOperationMetadata createOperationMeta = client.createCluster(clusterName, imageVersion, labels, privateInstance);
int numWarnings = createOperationMeta.getWarningsCount();
if (numWarnings > 0) {
LOG.warn("Encountered {} warning{} while creating Dataproc cluster:\n{}", numWarnings, numWarnings > 1 ? "s" : "", String.join("\n", createOperationMeta.getWarningsList()));
}
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count");
return new Cluster(clusterName, ClusterStatus.CREATING, Collections.emptyList(), Collections.emptyMap());
} catch (Exception e) {
DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count", e);
throw e;
}
}
use of com.google.cloud.dataproc.v1.Cluster in project java-dataproc by googleapis.
the class InstantiateInlineWorkflowTemplate method instantiateInlineWorkflowTemplate.
public static void instantiateInlineWorkflowTemplate(String projectId, String region) throws IOException, InterruptedException {
String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
// Configure the settings for the workflow template service client.
WorkflowTemplateServiceSettings workflowTemplateServiceSettings = WorkflowTemplateServiceSettings.newBuilder().setEndpoint(myEndpoint).build();
// closes the client, but this can also be done manually with the .close() method.
try (WorkflowTemplateServiceClient workflowTemplateServiceClient = WorkflowTemplateServiceClient.create(workflowTemplateServiceSettings)) {
// Configure the jobs within the workflow.
HadoopJob teragenHadoopJob = HadoopJob.newBuilder().setMainJarFileUri("file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar").addArgs("teragen").addArgs("1000").addArgs("hdfs:///gen/").build();
OrderedJob teragen = OrderedJob.newBuilder().setHadoopJob(teragenHadoopJob).setStepId("teragen").build();
HadoopJob terasortHadoopJob = HadoopJob.newBuilder().setMainJarFileUri("file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar").addArgs("terasort").addArgs("hdfs:///gen/").addArgs("hdfs:///sort/").build();
OrderedJob terasort = OrderedJob.newBuilder().setHadoopJob(terasortHadoopJob).addPrerequisiteStepIds("teragen").setStepId("terasort").build();
// Configure the cluster placement for the workflow.
// Leave "ZoneUri" empty for "Auto Zone Placement".
// GceClusterConfig gceClusterConfig =
// GceClusterConfig.newBuilder().setZoneUri("").build();
GceClusterConfig gceClusterConfig = GceClusterConfig.newBuilder().setZoneUri("us-central1-a").build();
ClusterConfig clusterConfig = ClusterConfig.newBuilder().setGceClusterConfig(gceClusterConfig).build();
ManagedCluster managedCluster = ManagedCluster.newBuilder().setClusterName("my-managed-cluster").setConfig(clusterConfig).build();
WorkflowTemplatePlacement workflowTemplatePlacement = WorkflowTemplatePlacement.newBuilder().setManagedCluster(managedCluster).build();
// Create the inline workflow template.
WorkflowTemplate workflowTemplate = WorkflowTemplate.newBuilder().addJobs(teragen).addJobs(terasort).setPlacement(workflowTemplatePlacement).build();
// Submit the instantiated inline workflow template request.
String parent = RegionName.format(projectId, region);
OperationFuture<Empty, WorkflowMetadata> instantiateInlineWorkflowTemplateAsync = workflowTemplateServiceClient.instantiateInlineWorkflowTemplateAsync(parent, workflowTemplate);
instantiateInlineWorkflowTemplateAsync.get();
// Print out a success message.
System.out.printf("Workflow ran successfully.");
} catch (ExecutionException e) {
System.err.println(String.format("Error running workflow: %s ", e.getMessage()));
}
}
use of com.google.cloud.dataproc.v1.Cluster in project java-dataproc by googleapis.
the class SubmitJob method submitJob.
public static void submitJob(String projectId, String region, String clusterName) throws IOException, InterruptedException {
String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
// Configure the settings for the job controller client.
JobControllerSettings jobControllerSettings = JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
// but this can also be done manually with the .close() method.
try (JobControllerClient jobControllerClient = JobControllerClient.create(jobControllerSettings)) {
// Configure cluster placement for the job.
JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
// Configure Spark job settings.
SparkJob sparkJob = SparkJob.newBuilder().setMainClass("org.apache.spark.examples.SparkPi").addJarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar").addArgs("1000").build();
Job job = Job.newBuilder().setPlacement(jobPlacement).setSparkJob(sparkJob).build();
// Submit an asynchronous request to execute the job.
OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest = jobControllerClient.submitJobAsOperationAsync(projectId, region, job);
Job response = submitJobAsOperationAsyncRequest.get();
// Print output from Google Cloud Storage.
Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(response.getDriverOutputResourceUri());
matches.matches();
Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
System.out.println(String.format("Job finished successfully: %s", new String(blob.getContent())));
} catch (ExecutionException e) {
// If the job does not complete successfully, print the error message.
System.err.println(String.format("submitJob: %s ", e.getMessage()));
}
}
use of com.google.cloud.dataproc.v1.Cluster in project java-dataproc by googleapis.
the class QuickstartTest method teardown.
@After
public void teardown() throws IOException, InterruptedException, ExecutionException {
blob.delete();
bucket.delete();
ClusterControllerSettings clusterControllerSettings = ClusterControllerSettings.newBuilder().setEndpoint(ENDPOINT).build();
try (ClusterControllerClient clusterControllerClient = ClusterControllerClient.create(clusterControllerSettings)) {
for (Cluster element : clusterControllerClient.listClusters(PROJECT_ID, REGION).iterateAll()) {
if (element.getClusterName() == CLUSTER_NAME) {
OperationFuture<Empty, ClusterOperationMetadata> deleteClusterAsyncRequest = clusterControllerClient.deleteClusterAsync(PROJECT_ID, REGION, CLUSTER_NAME);
deleteClusterAsyncRequest.get();
break;
}
}
}
}
Aggregations