use of com.google.api.services.dataflow.model.Job in project beam by apache.
the class DataflowPipelineTranslatorTest method testWorkerMachineTypeConfig.
@Test
public void testWorkerMachineTypeConfig() throws IOException {
final String testMachineType = "test-machine-type";
DataflowPipelineOptions options = buildPipelineOptions();
options.setWorkerMachineType(testMachineType);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
assertEquals(testMachineType, workerPool.getMachineType());
}
use of com.google.api.services.dataflow.model.Job in project beam by apache.
the class DataflowPipelineTranslatorTest method testDiskSizeGbConfig.
@Test
public void testDiskSizeGbConfig() throws IOException {
final Integer diskSizeGb = 1234;
DataflowPipelineOptions options = buildPipelineOptions();
options.setDiskSizeGb(diskSizeGb);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}
use of com.google.api.services.dataflow.model.Job in project beam by apache.
the class DataflowPipelineJob method getJobWithRetries.
/**
* Attempts to get the underlying {@link Job}. Uses exponential backoff on failure up to the
* maximum number of passed in attempts.
*
* @param backoff the {@link BackOff} used to control retries.
* @param sleeper Object used to do the sleeps between attempts.
* @return The underlying {@link Job} object.
* @throws IOException When the maximum number of retries is exhausted, the last exception is
* thrown.
*/
private Job getJobWithRetries(BackOff backoff, Sleeper sleeper) throws IOException {
// Retry loop ends in return or throw
while (true) {
try {
Job job = dataflowClient.getJob(jobId);
State currentState = MonitoringUtil.toState(job.getCurrentState());
if (currentState.isTerminal()) {
terminalState = currentState;
replacedByJob = new DataflowPipelineJob(dataflowClient, job.getReplacedByJobId(), dataflowOptions, transformStepNames);
}
return job;
} catch (IOException exn) {
LOG.warn("There were problems getting current job status: {}.", exn.getMessage());
LOG.debug("Exception information:", exn);
if (!nextBackOff(sleeper, backoff)) {
throw exn;
}
}
}
}
use of com.google.api.services.dataflow.model.Job in project beam by apache.
the class DataflowPipelineTranslator method translate.
/**
* Translates a {@link Pipeline} into a {@code JobSpecification}.
*/
public JobSpecification translate(Pipeline pipeline, DataflowRunner runner, List<DataflowPackage> packages) {
Translator translator = new Translator(pipeline, runner);
Job result = translator.translate(packages);
return new JobSpecification(result, Collections.unmodifiableMap(translator.stepNames));
}
use of com.google.api.services.dataflow.model.Job in project beam by apache.
the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.
/**
* Test that in translation the name for collections of a multi-output ParDo - a special case
* because the user can name tags - are overridden to be what the Dataflow service expects.
*/
@Test
public void testTaggedNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
};
TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
};
TupleTag<Integer> tag3 = new TupleTag<Integer>() {
};
PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void drop() {
}
}).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
outputs.get(tag1).setName("bizbazzle");
outputs.get(tag2).setName("gonzaggle");
outputs.get(tag3).setName("froonazzle");
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The ParDo step
Step step = job.getSteps().get(1);
String stepName = Structs.getString(step.getProperties(), PropertyNames.USER_NAME);
List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
assertThat(outputInfos.size(), equalTo(3));
// The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
for (int i = 0; i < outputInfos.size(); ++i) {
assertThat(Structs.getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
}
}
Aggregations