Examples with PipelineResult - org.apache.beam.sdk.PipelineResult

Example 16 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TextIOIT method writeThenReadAll.

@Test
public void writeThenReadAll() {
    TextIO.TypedWrite<String, Object> write = TextIO.write().to(filenamePrefix).withOutputFilenames().withCompression(compressionType);
    if (numShards != null) {
        write = write.withNumShards(numShards);
    }
    PCollection<String> testFilenames = pipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Collect write start time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "startTime"))).apply("Write content to files", write).getPerDestinationOutputFilenames().apply(Values.create()).apply("Collect write end time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "middleTime")));
    PCollection<String> consolidatedHashcode = testFilenames.apply("Match all files", FileIO.matchAll()).apply("Read matches", FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)).apply("Read files", TextIO.readFiles()).apply("Collect read end time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "endTime"))).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
    testFilenames.apply("Delete test files", ParDo.of(new DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishMetrics(result);
}

Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) FileBasedIOITHelper(org.apache.beam.sdk.io.common.FileBasedIOITHelper) PipelineResult(org.apache.beam.sdk.PipelineResult) TextIO(org.apache.beam.sdk.io.TextIO) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeleteFileFn(org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn) Test(org.junit.Test)

Example 17 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class JdbcIOAutoPartitioningIT method prepareDatabase.

@Before
public void prepareDatabase() throws SQLException {
    pipelineRead.getOptions().setStableUniqueNames(CheckEnabled.OFF);
    DataSource dbDs = DatabaseTestHelper.getDataSourceForContainer(getDb(dbms));
    try {
        DatabaseTestHelper.createTable(dbDs, TABLE_NAME, Lists.newArrayList(KV.of("id", "INTEGER"), KV.of("name", "VARCHAR(50)"), KV.of("specialDate", "TIMESTAMP")));
    } catch (SQLException e) {
        LOG.info("Exception occurred when preparing database {}. " + "This is expected, and the test should pass.", dbms, e);
        return;
    } catch (Exception e) {
        throw e;
    }
    final String dbmsLocal = dbms;
    pipelineWrite.apply(GenerateSequence.from(0).to(NUM_ROWS)).apply(MapElements.via(new MapRowDataFn())).apply(JdbcIO.<RowData>write().withTable(TABLE_NAME).withDataSourceProviderFn(voide -> DatabaseTestHelper.getDataSourceForContainer(getDb(dbmsLocal))));
    PipelineResult res = pipelineWrite.run();
    res.metrics().allMetrics().getDistributions().forEach(dist -> {
        if (dist.getName().getName().contains("intsDistribution")) {
            LOG.info("Metric: {} | Min: {} | Max: {}", dist.getName().getName(), dist.getCommitted().getMin(), dist.getCommitted().getMax());
        } else if (dist.getName().getName().contains("intsDistribution")) {
            LOG.info("Metric: {} | Min: {} | Max: {}", dist.getName().getName(), new DateTime(Instant.EPOCH.plus(Duration.millis(dist.getCommitted().getMin()))), new DateTime(Instant.EPOCH.plus(Duration.millis(dist.getCommitted().getMax()))));
        }
    });
    res.waitUntilFinish();
}

Also used : Count(org.apache.beam.sdk.transforms.Count) Statement(org.junit.runners.model.Statement) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) TestRule(org.junit.rules.TestRule) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) PostgreSQLContainer(org.testcontainers.containers.PostgreSQLContainer) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) Metrics(org.apache.beam.sdk.metrics.Metrics) JdbcDatabaseContainer(org.testcontainers.containers.JdbcDatabaseContainer) SQLException(java.sql.SQLException) ResultSet(java.sql.ResultSet) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) SchemaCreate(org.apache.beam.sdk.schemas.annotations.SchemaCreate) Row(org.apache.beam.sdk.values.Row) ClassRule(org.junit.ClassRule) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) MapElements(org.apache.beam.sdk.transforms.MapElements) RowMapper(org.apache.beam.sdk.io.jdbc.JdbcIO.RowMapper) Logger(org.slf4j.Logger) PAssert(org.apache.beam.sdk.testing.PAssert) DateTime(org.joda.time.DateTime) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) Description(org.junit.runner.Description) PCollection(org.apache.beam.sdk.values.PCollection) CheckEnabled(org.apache.beam.sdk.options.PipelineOptions.CheckEnabled) Distribution(org.apache.beam.sdk.metrics.Distribution) Objects(java.util.Objects) MySQLContainer(org.testcontainers.containers.MySQLContainer) Rule(org.junit.Rule) Ignore(org.junit.Ignore) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Instant(org.joda.time.Instant) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) SQLException(java.sql.SQLException) PipelineResult(org.apache.beam.sdk.PipelineResult) SQLException(java.sql.SQLException) DateTime(org.joda.time.DateTime) DataSource(javax.sql.DataSource) Before(org.junit.Before)

Example 18 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class InfluxDbIOTest method validateReadTest.

@Test
public void validateReadTest() {
    String influxHost = "http://localhost";
    String userName = "admin";
    String password = "admin";
    String influxDatabaseName = "testDataBase";
    InfluxDB influxDb = Mockito.mock(InfluxDB.class);
    PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString(), any(OkHttpClient.Builder.class))).thenReturn(influxDb);
    PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString())).thenReturn(influxDb);
    doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW DATABASES"));
    doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW SHARDS"));
    doReturn(mockResultForNumberAndSizeOfBlocks()).when(influxDb).query(new Query("EXPLAIN SELECT * FROM cpu", influxDatabaseName));
    doReturn(mockResult("cpu", 20)).when(influxDb).query(new Query("SELECT * FROM cpu", influxDatabaseName));
    PCollection<Long> data = pipeline.apply("Read data to InfluxDB", InfluxDbIO.read().withDataSourceConfiguration(DataSourceConfiguration.create(StaticValueProvider.of(influxHost), StaticValueProvider.of(userName), StaticValueProvider.of(password))).withDatabase(influxDatabaseName).withQuery("SELECT * FROM cpu")).apply(Count.globally());
    PAssert.that(data).containsInAnyOrder(20L);
    PipelineResult result = pipeline.run();
    Assert.assertEquals(State.DONE, result.waitUntilFinish());
}

Also used : Query(org.influxdb.dto.Query) InfluxDB(org.influxdb.InfluxDB) PipelineResult(org.apache.beam.sdk.PipelineResult) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 19 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class MetricsReaderTest method testTimeIsMinusOneIfTimeMetricIsTooFarFromNow.

@Test
public void testTimeIsMinusOneIfTimeMetricIsTooFarFromNow() {
    List<Integer> sampleInputData = Arrays.asList(1, 5, 5, 5, 5);
    createTestPipeline(sampleInputData, new MonitorWithTimeDistribution());
    PipelineResult result = testPipeline.run();
    MetricsReader reader = new MetricsReader(result, NAMESPACE, 900000000001L);
    assertEquals(-1, reader.getStartTimeMetric("timeDist"));
    assertEquals(-1, reader.getEndTimeMetric("timeDist"));
}

Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 20 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class MetricsReaderTest method testEndTimeIsTheMaximumOfAllCollectedDistributions.

@Test
public void testEndTimeIsTheMaximumOfAllCollectedDistributions() {
    List<Integer> sampleInputData = Arrays.asList(1, 2, 3, 4, 5);
    createTestPipelineWithBranches(sampleInputData);
    PipelineResult result = testPipeline.run();
    MetricsReader reader = new MetricsReader(result, NAMESPACE, 0);
    assertEquals(10, reader.getEndTimeMetric("timeDist"));
}

Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6