use of com.google.cloud.dataproc.v1.SparkJob in project java-dataproc by googleapis.
the class SubmitJob method submitJob.
public static void submitJob(String projectId, String region, String clusterName) throws IOException, InterruptedException {
String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
// Configure the settings for the job controller client.
JobControllerSettings jobControllerSettings = JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
// but this can also be done manually with the .close() method.
try (JobControllerClient jobControllerClient = JobControllerClient.create(jobControllerSettings)) {
// Configure cluster placement for the job.
JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
// Configure Spark job settings.
SparkJob sparkJob = SparkJob.newBuilder().setMainClass("org.apache.spark.examples.SparkPi").addJarFileUris("file:///usr/lib/spark/examples/jars/spark-examples.jar").addArgs("1000").build();
Job job = Job.newBuilder().setPlacement(jobPlacement).setSparkJob(sparkJob).build();
// Submit an asynchronous request to execute the job.
OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest = jobControllerClient.submitJobAsOperationAsync(projectId, region, job);
Job response = submitJobAsOperationAsyncRequest.get();
// Print output from Google Cloud Storage.
Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(response.getDriverOutputResourceUri());
matches.matches();
Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
System.out.println(String.format("Job finished successfully: %s", new String(blob.getContent())));
} catch (ExecutionException e) {
// If the job does not complete successfully, print the error message.
System.err.println(String.format("submitJob: %s ", e.getMessage()));
}
}
Aggregations