use of com.google.cloud.dataproc.v1.HadoopJob in project java-dataproc by googleapis.
the class InstantiateInlineWorkflowTemplate method instantiateInlineWorkflowTemplate.
public static void instantiateInlineWorkflowTemplate(String projectId, String region) throws IOException, InterruptedException {
String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
// Configure the settings for the workflow template service client.
WorkflowTemplateServiceSettings workflowTemplateServiceSettings = WorkflowTemplateServiceSettings.newBuilder().setEndpoint(myEndpoint).build();
// closes the client, but this can also be done manually with the .close() method.
try (WorkflowTemplateServiceClient workflowTemplateServiceClient = WorkflowTemplateServiceClient.create(workflowTemplateServiceSettings)) {
// Configure the jobs within the workflow.
HadoopJob teragenHadoopJob = HadoopJob.newBuilder().setMainJarFileUri("file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar").addArgs("teragen").addArgs("1000").addArgs("hdfs:///gen/").build();
OrderedJob teragen = OrderedJob.newBuilder().setHadoopJob(teragenHadoopJob).setStepId("teragen").build();
HadoopJob terasortHadoopJob = HadoopJob.newBuilder().setMainJarFileUri("file:///usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar").addArgs("terasort").addArgs("hdfs:///gen/").addArgs("hdfs:///sort/").build();
OrderedJob terasort = OrderedJob.newBuilder().setHadoopJob(terasortHadoopJob).addPrerequisiteStepIds("teragen").setStepId("terasort").build();
// Configure the cluster placement for the workflow.
// Leave "ZoneUri" empty for "Auto Zone Placement".
// GceClusterConfig gceClusterConfig =
// GceClusterConfig.newBuilder().setZoneUri("").build();
GceClusterConfig gceClusterConfig = GceClusterConfig.newBuilder().setZoneUri("us-central1-a").build();
ClusterConfig clusterConfig = ClusterConfig.newBuilder().setGceClusterConfig(gceClusterConfig).build();
ManagedCluster managedCluster = ManagedCluster.newBuilder().setClusterName("my-managed-cluster").setConfig(clusterConfig).build();
WorkflowTemplatePlacement workflowTemplatePlacement = WorkflowTemplatePlacement.newBuilder().setManagedCluster(managedCluster).build();
// Create the inline workflow template.
WorkflowTemplate workflowTemplate = WorkflowTemplate.newBuilder().addJobs(teragen).addJobs(terasort).setPlacement(workflowTemplatePlacement).build();
// Submit the instantiated inline workflow template request.
String parent = RegionName.format(projectId, region);
OperationFuture<Empty, WorkflowMetadata> instantiateInlineWorkflowTemplateAsync = workflowTemplateServiceClient.instantiateInlineWorkflowTemplateAsync(parent, workflowTemplate);
instantiateInlineWorkflowTemplateAsync.get();
// Print out a success message.
System.out.printf("Workflow ran successfully.");
} catch (ExecutionException e) {
System.err.println(String.format("Error running workflow: %s ", e.getMessage()));
}
}
use of com.google.cloud.dataproc.v1.HadoopJob in project java-dataproc by googleapis.
the class SubmitHadoopFsJob method submitHadoopFsJob.
public static void submitHadoopFsJob(String projectId, String region, String clusterName, String hadoopFsQuery) throws IOException, InterruptedException {
String myEndpoint = String.format("%s-dataproc.googleapis.com:443", region);
// Configure the settings for the job controller client.
JobControllerSettings jobControllerSettings = JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
// but this can also be done manually with the .close() method.
try (JobControllerClient jobControllerClient = JobControllerClient.create(jobControllerSettings)) {
// Configure cluster placement for the job.
JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
// Configure Hadoop job settings. The HadoopFS query is set here.
HadoopJob hadoopJob = HadoopJob.newBuilder().setMainClass("org.apache.hadoop.fs.FsShell").addAllArgs(stringToList(hadoopFsQuery)).build();
Job job = Job.newBuilder().setPlacement(jobPlacement).setHadoopJob(hadoopJob).build();
// Submit an asynchronous request to execute the job.
OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest = jobControllerClient.submitJobAsOperationAsync(projectId, region, job);
Job response = submitJobAsOperationAsyncRequest.get();
// Print output from Google Cloud Storage.
Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(response.getDriverOutputResourceUri());
matches.matches();
Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
System.out.println(String.format("Job finished successfully: %s", new String(blob.getContent())));
} catch (ExecutionException e) {
// If the job does not complete successfully, print the error message.
System.err.println(String.format("submitHadoopFSJob: %s ", e.getMessage()));
}
}
Aggregations