Search in sources :

Example 66 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class BeamEnumerableConverter method count.

private static Enumerable<Object> count(PipelineOptions options, BeamRelNode node) {
    Pipeline pipeline = Pipeline.create(options);
    BeamSqlRelUtils.toPCollection(pipeline, node).apply(ParDo.of(new RowCounter()));
    PipelineResult result = pipeline.run();
    long count = 0;
    if (!containsUnboundedPCollection(pipeline)) {
        if (PipelineResult.State.FAILED.equals(result.waitUntilFinish())) {
            throw new RuntimeException("Pipeline failed for unknown reason");
        }
        MetricQueryResults metrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(BeamEnumerableConverter.class, "rows")).build());
        Iterator<MetricResult<Long>> iterator = metrics.getCounters().iterator();
        if (iterator.hasNext()) {
            count = iterator.next().getAttempted();
        }
    }
    return Linq4j.singletonEnumerable(count);
}
Also used : MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) MetricResult(org.apache.beam.sdk.metrics.MetricResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 67 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class SpannerWriteIT method testSequentialWrite.

@Test
public void testSequentialWrite() throws Exception {
    int numRecords = 100;
    SpannerWriteResult stepOne = p.apply("first step", GenerateSequence.from(0).to(numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    p.apply("second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen mutations", ParDo.of(new GenerateMutations(options.getTable()))).apply(Wait.on(stepOne.getOutput())).apply("write to table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
    PipelineResult result = p.run();
    result.waitUntilFinish();
    assertThat(result.getState(), is(PipelineResult.State.DONE));
    assertThat(countNumberOfRecords(), equalTo(2L * numRecords));
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 68 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class ReadWriteIT method testReadWrite.

@Test
public void testReadWrite() throws Exception {
    pipeline.getOptions().as(StreamingOptions.class).setStreaming(true);
    pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
    TopicPath topic = createTopic(getProject(pipeline.getOptions()));
    SubscriptionPath subscription = null;
    Exception lastException = null;
    for (int i = 0; i < 30; ++i) {
        // Sleep for topic creation to propagate.
        Thread.sleep(1000);
        try {
            subscription = createSubscription(topic);
            break;
        } catch (Exception e) {
            lastException = e;
            LOG.info("Retrying exception on subscription creation.", e);
        }
    }
    if (subscription == null) {
        throw lastException;
    }
    // Publish some messages
    writeMessages(topic, pipeline);
    // Read some messages. They should be deduplicated by the time we see them, so there should be
    // exactly numMessages, one for every index in [0,MESSAGE_COUNT).
    PCollection<SequencedMessage> messages = readMessages(subscription, pipeline);
    PCollection<Integer> ids = messages.apply(MapElements.via(extractIds()));
    ids.apply("PubsubSignalTest", signal.signalSuccessWhen(BigEndianIntegerCoder.of(), testIds()));
    Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
    pipeline.apply(signal.signalStart());
    PipelineResult job = pipeline.run();
    start.get();
    LOG.info("Running!");
    signal.waitForSuccess(Duration.standardMinutes(5));
    // A runner may not support cancel
    try {
        job.cancel();
    } catch (UnsupportedOperationException exc) {
    // noop
    }
}
Also used : TopicPath(com.google.cloud.pubsublite.TopicPath) SubscriptionPath(com.google.cloud.pubsublite.SubscriptionPath) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) PipelineResult(org.apache.beam.sdk.PipelineResult) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) SequencedMessage(com.google.cloud.pubsublite.proto.SequencedMessage) Test(org.junit.Test)

Example 69 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class InfluxDbIOTest method validateWriteTest.

@Test
public void validateWriteTest() {
    InfluxDB influxDb = Mockito.mock(InfluxDB.class);
    PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString(), any(OkHttpClient.Builder.class))).thenReturn(influxDb);
    PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString())).thenReturn(influxDb);
    String influxHost = "http://localhost";
    String userName = "admin";
    String password = "admin";
    String influxDatabaseName = "testDataBase";
    AtomicInteger countInvocation = new AtomicInteger();
    Mockito.doAnswer(invocation -> countInvocation.getAndIncrement()).when(influxDb).write(any(List.class));
    doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW DATABASES"));
    final int numOfElementsToWrite = 1000;
    pipeline.apply("Generate data", Create.of(GenerateData.getMetric("test_m", numOfElementsToWrite))).apply("Write data to InfluxDB", InfluxDbIO.write().withDataSourceConfiguration(DataSourceConfiguration.create(StaticValueProvider.of(influxHost), StaticValueProvider.of(userName), StaticValueProvider.of(password))).withDatabase(influxDatabaseName));
    PipelineResult result = pipeline.run();
    Assert.assertEquals(State.DONE, result.waitUntilFinish());
    Assert.assertTrue(countInvocation.get() > 0);
}
Also used : Query(org.influxdb.dto.Query) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InfluxDB(org.influxdb.InfluxDB) PipelineResult(org.apache.beam.sdk.PipelineResult) ArrayList(java.util.ArrayList) List(java.util.List) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 70 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class HadoopFormatIOIT method writeAndReadUsingHadoopFormat.

@Test
public void writeAndReadUsingHadoopFormat() {
    writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfRows)).apply("Produce db rows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn())).apply("Prevent fusion before writing", Reshuffle.viaRandomKey()).apply("Collect write time", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Construct rows for DBOutputFormat", ParDo.of(new ConstructDBOutputFormatRowFn())).apply("Write using Hadoop OutputFormat", HadoopFormatIO.<TestRowDBWritable, NullWritable>write().withConfiguration(hadoopConfiguration.get()).withPartitioning().withExternalSynchronization(new HDFSSynchronization(tmpFolder.getRoot().getAbsolutePath())));
    PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    PCollection<String> consolidatedHashcode = readPipeline.apply("Read using Hadoop InputFormat", HadoopFormatIO.<LongWritable, TestRowDBWritable>read().withConfiguration(hadoopConfiguration.get())).apply("Collect read time", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Get values only", Values.create()).apply("Values as string", ParDo.of(new TestRow.SelectNameFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(getExpectedHashForRowCount(numberOfRows));
    PipelineResult readResult = readPipeline.run();
    readResult.waitUntilFinish();
    if (!options.isWithTestcontainers()) {
        collectAndPublishMetrics(writeResult, readResult);
    }
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) TestRow(org.apache.beam.sdk.io.common.TestRow) PipelineResult(org.apache.beam.sdk.PipelineResult) NullWritable(org.apache.hadoop.io.NullWritable) HashingFn(org.apache.beam.sdk.io.common.HashingFn) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6