Search in sources :

Example 6 with Job

use of com.google.api.services.dataflow.model.Job in project beam by apache.

the class DataflowPipelineTranslatorTest method testWorkerMachineTypeConfig.

@Test
public void testWorkerMachineTypeConfig() throws IOException {
    final String testMachineType = "test-machine-type";
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setWorkerMachineType(testMachineType);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
    assertEquals(testMachineType, workerPool.getMachineType());
}
Also used : WorkerPool(com.google.api.services.dataflow.model.WorkerPool) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 7 with Job

use of com.google.api.services.dataflow.model.Job in project beam by apache.

the class DataflowPipelineTranslatorTest method testDiskSizeGbConfig.

@Test
public void testDiskSizeGbConfig() throws IOException {
    final Integer diskSizeGb = 1234;
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setDiskSizeGb(diskSizeGb);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 8 with Job

use of com.google.api.services.dataflow.model.Job in project beam by apache.

the class DataflowPipelineJob method getJobWithRetries.

/**
   * Attempts to get the underlying {@link Job}. Uses exponential backoff on failure up to the
   * maximum number of passed in attempts.
   *
   * @param backoff the {@link BackOff} used to control retries.
   * @param sleeper Object used to do the sleeps between attempts.
   * @return The underlying {@link Job} object.
   * @throws IOException When the maximum number of retries is exhausted, the last exception is
   * thrown.
   */
private Job getJobWithRetries(BackOff backoff, Sleeper sleeper) throws IOException {
    // Retry loop ends in return or throw
    while (true) {
        try {
            Job job = dataflowClient.getJob(jobId);
            State currentState = MonitoringUtil.toState(job.getCurrentState());
            if (currentState.isTerminal()) {
                terminalState = currentState;
                replacedByJob = new DataflowPipelineJob(dataflowClient, job.getReplacedByJobId(), dataflowOptions, transformStepNames);
            }
            return job;
        } catch (IOException exn) {
            LOG.warn("There were problems getting current job status: {}.", exn.getMessage());
            LOG.debug("Exception information:", exn);
            if (!nextBackOff(sleeper, backoff)) {
                throw exn;
            }
        }
    }
}
Also used : IOException(java.io.IOException) Job(com.google.api.services.dataflow.model.Job)

Example 9 with Job

use of com.google.api.services.dataflow.model.Job in project beam by apache.

the class DataflowPipelineTranslator method translate.

/**
   * Translates a {@link Pipeline} into a {@code JobSpecification}.
   */
public JobSpecification translate(Pipeline pipeline, DataflowRunner runner, List<DataflowPackage> packages) {
    Translator translator = new Translator(pipeline, runner);
    Job result = translator.translate(packages);
    return new JobSpecification(result, Collections.unmodifiableMap(translator.stepNames));
}
Also used : Job(com.google.api.services.dataflow.model.Job)

Example 10 with Job

use of com.google.api.services.dataflow.model.Job in project beam by apache.

the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.

/**
   * Test that in translation the name for collections of a multi-output ParDo - a special case
   * because the user can name tags - are overridden to be what the Dataflow service expects.
   */
@Test
public void testTaggedNamesOverridden() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
    };
    TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
    };
    TupleTag<Integer> tag3 = new TupleTag<Integer>() {
    };
    PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void drop() {
        }
    }).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
    outputs.get(tag1).setName("bizbazzle");
    outputs.get(tag2).setName("gonzaggle");
    outputs.get(tag3).setName("froonazzle");
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    // The ParDo step
    Step step = job.getSteps().get(1);
    String stepName = Structs.getString(step.getProperties(), PropertyNames.USER_NAME);
    List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
    assertThat(outputInfos.size(), equalTo(3));
    // The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
    for (int i = 0; i < outputInfos.size(); ++i) {
        assertThat(Structs.getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Pipeline(org.apache.beam.sdk.Pipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Job(com.google.api.services.dataflow.model.Job) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test)

Aggregations

Job (com.google.api.services.dataflow.model.Job)47 Test (org.junit.Test)39 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)31 Pipeline (org.apache.beam.sdk.Pipeline)26 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)22 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)13 DataflowRunner.getContainerImageForJob (org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob)11 Step (com.google.api.services.dataflow.model.Step)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 IOException (java.io.IOException)6 Map (java.util.Map)6 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)6 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)6 Dataflow (com.google.api.services.dataflow.Dataflow)5 State (org.apache.beam.sdk.PipelineResult.State)3 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)2 JobMetrics (com.google.api.services.dataflow.model.JobMetrics)2 ListJobsResponse (com.google.api.services.dataflow.model.ListJobsResponse)2 WorkerPool (com.google.api.services.dataflow.model.WorkerPool)2 ImmutableList (com.google.common.collect.ImmutableList)2