Search in sources :

Example 36 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class GcsKmsKeyIT method testGcsWriteWithKmsKey.

/**
 * Tests writing to tempLocation with --dataflowKmsKey set on the command line. Verifies that
 * resulting output uses specified key and is readable. Does not verify any temporary files.
 *
 * <p>This test verifies that GCS file copies work with CMEK-enabled files.
 */
@Test
public void testGcsWriteWithKmsKey() {
    TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
    assertNotNull(options.getTempRoot());
    options.setTempLocation(options.getTempRoot() + "/testGcsWriteWithKmsKey");
    GcsOptions gcsOptions = options.as(GcsOptions.class);
    ResourceId filenamePrefix = FileSystems.matchNewResource(gcsOptions.getGcpTempLocation(), true).resolve(String.format("GcsKmsKeyIT-%tF-%<tH-%<tM-%<tS-%<tL.output", new Date()), StandardResolveOptions.RESOLVE_FILE);
    Pipeline p = Pipeline.create(options);
    p.apply("ReadLines", TextIO.read().from(INPUT_FILE)).apply("WriteLines", TextIO.write().to(filenamePrefix));
    PipelineResult result = p.run();
    State state = result.waitUntilFinish();
    assertThat(state, equalTo(State.DONE));
    String filePattern = filenamePrefix + "*-of-*";
    assertThat(new NumberedShardedFile(filePattern), fileContentsHaveChecksum(EXPECTED_CHECKSUM));
    // Verify objects have KMS key set.
    try {
        MatchResult matchResult = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern)));
        GcsUtil gcsUtil = gcsOptions.getGcsUtil();
        for (Metadata metadata : matchResult.metadata()) {
            String kmsKey = gcsUtil.getObject(GcsPath.fromUri(metadata.resourceId().toString())).getKmsKeyName();
            assertNotNull(kmsKey);
        }
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}
Also used : Metadata(org.apache.beam.sdk.io.fs.MatchResult.Metadata) PipelineResult(org.apache.beam.sdk.PipelineResult) IOException(java.io.IOException) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Date(java.util.Date) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) State(org.apache.beam.sdk.PipelineResult.State) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) GcsUtil(org.apache.beam.sdk.extensions.gcp.util.GcsUtil) Test(org.junit.Test)

Example 37 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class BeamEnumerableConverter method runCollector.

private static void runCollector(PipelineOptions options, BeamRelNode node) {
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
    resultCollection.apply(ParDo.of(new Collector()));
    PipelineResult result = pipeline.run();
    if (PipelineResult.State.FAILED.equals(result.waitUntilFinish())) {
        throw new RuntimeException("Pipeline failed for unknown reason");
    }
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Row(org.apache.beam.sdk.values.Row) Pipeline(org.apache.beam.sdk.Pipeline)

Example 38 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class BeamEnumerableConverter method limitRun.

private static PipelineResult limitRun(PipelineOptions options, BeamRelNode node, DoFn<Row, Void> doFn, Queue<Row> values, int limitCount) {
    options.as(DirectOptions.class).setBlockOnRun(false);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
    resultCollection.apply(ParDo.of(doFn));
    PipelineResult result = pipeline.run();
    State state;
    while (true) {
        // Check pipeline state in every second
        state = result.waitUntilFinish(Duration.standardSeconds(1));
        if (state != null && state.isTerminal()) {
            if (PipelineResult.State.FAILED.equals(state)) {
                throw new RuntimeException("Pipeline failed for unknown reason");
            }
            break;
        }
        try {
            if (values.size() >= limitCount) {
                result.cancel();
                break;
            }
        } catch (IOException e) {
            LOG.warn(e.toString());
            break;
        }
    }
    return result;
}
Also used : State(org.apache.beam.sdk.PipelineResult.State) PipelineResult(org.apache.beam.sdk.PipelineResult) Row(org.apache.beam.sdk.values.Row) IOException(java.io.IOException) DirectOptions(org.apache.beam.runners.direct.DirectOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Example 39 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class SpannerWriteIT method testWrite.

@Test
public void testWrite() throws Exception {
    int numRecords = 100;
    p.apply(GenerateSequence.from(0).to(numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    PipelineResult result = p.run();
    result.waitUntilFinish();
    assertThat(result.getState(), is(PipelineResult.State.DONE));
    assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 40 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class SpannerWriteIT method testReportFailures.

@Test
public void testReportFailures() throws Exception {
    int numRecords = 100;
    p.apply(GenerateSequence.from(0).to(2 * numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName).withFailureMode(SpannerIO.FailureMode.REPORT_FAILURES));
    PipelineResult result = p.run();
    result.waitUntilFinish();
    assertThat(result.getState(), is(PipelineResult.State.DONE));
    assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6