use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class TextIOIT method writeThenReadAll.
@Test
public void writeThenReadAll() {
TextIO.TypedWrite<String, Object> write = TextIO.write().to(filenamePrefix).withOutputFilenames().withCompression(compressionType);
if (numShards != null) {
write = write.withNumShards(numShards);
}
PCollection<String> testFilenames = pipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Produce text lines", ParDo.of(new FileBasedIOITHelper.DeterministicallyConstructTestTextLineFn())).apply("Collect write start time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "startTime"))).apply("Write content to files", write).getPerDestinationOutputFilenames().apply(Values.create()).apply("Collect write end time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "middleTime")));
PCollection<String> consolidatedHashcode = testFilenames.apply("Match all files", FileIO.matchAll()).apply("Read matches", FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)).apply("Read files", TextIO.readFiles()).apply("Collect read end time", ParDo.of(new TimeMonitor<>(FILEIOIT_NAMESPACE, "endTime"))).apply("Calculate hashcode", Combine.globally(new HashingFn()));
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
testFilenames.apply("Delete test files", ParDo.of(new DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class JdbcIOAutoPartitioningIT method prepareDatabase.
@Before
public void prepareDatabase() throws SQLException {
pipelineRead.getOptions().setStableUniqueNames(CheckEnabled.OFF);
DataSource dbDs = DatabaseTestHelper.getDataSourceForContainer(getDb(dbms));
try {
DatabaseTestHelper.createTable(dbDs, TABLE_NAME, Lists.newArrayList(KV.of("id", "INTEGER"), KV.of("name", "VARCHAR(50)"), KV.of("specialDate", "TIMESTAMP")));
} catch (SQLException e) {
LOG.info("Exception occurred when preparing database {}. " + "This is expected, and the test should pass.", dbms, e);
return;
} catch (Exception e) {
throw e;
}
final String dbmsLocal = dbms;
pipelineWrite.apply(GenerateSequence.from(0).to(NUM_ROWS)).apply(MapElements.via(new MapRowDataFn())).apply(JdbcIO.<RowData>write().withTable(TABLE_NAME).withDataSourceProviderFn(voide -> DatabaseTestHelper.getDataSourceForContainer(getDb(dbmsLocal))));
PipelineResult res = pipelineWrite.run();
res.metrics().allMetrics().getDistributions().forEach(dist -> {
if (dist.getName().getName().contains("intsDistribution")) {
LOG.info("Metric: {} | Min: {} | Max: {}", dist.getName().getName(), dist.getCommitted().getMin(), dist.getCommitted().getMax());
} else if (dist.getName().getName().contains("intsDistribution")) {
LOG.info("Metric: {} | Min: {} | Max: {}", dist.getName().getName(), new DateTime(Instant.EPOCH.plus(Duration.millis(dist.getCommitted().getMin()))), new DateTime(Instant.EPOCH.plus(Duration.millis(dist.getCommitted().getMax()))));
}
});
res.waitUntilFinish();
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class InfluxDbIOTest method validateReadTest.
@Test
public void validateReadTest() {
String influxHost = "http://localhost";
String userName = "admin";
String password = "admin";
String influxDatabaseName = "testDataBase";
InfluxDB influxDb = Mockito.mock(InfluxDB.class);
PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString(), any(OkHttpClient.Builder.class))).thenReturn(influxDb);
PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString())).thenReturn(influxDb);
doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW DATABASES"));
doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW SHARDS"));
doReturn(mockResultForNumberAndSizeOfBlocks()).when(influxDb).query(new Query("EXPLAIN SELECT * FROM cpu", influxDatabaseName));
doReturn(mockResult("cpu", 20)).when(influxDb).query(new Query("SELECT * FROM cpu", influxDatabaseName));
PCollection<Long> data = pipeline.apply("Read data to InfluxDB", InfluxDbIO.read().withDataSourceConfiguration(DataSourceConfiguration.create(StaticValueProvider.of(influxHost), StaticValueProvider.of(userName), StaticValueProvider.of(password))).withDatabase(influxDatabaseName).withQuery("SELECT * FROM cpu")).apply(Count.globally());
PAssert.that(data).containsInAnyOrder(20L);
PipelineResult result = pipeline.run();
Assert.assertEquals(State.DONE, result.waitUntilFinish());
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class MetricsReaderTest method testTimeIsMinusOneIfTimeMetricIsTooFarFromNow.
@Test
public void testTimeIsMinusOneIfTimeMetricIsTooFarFromNow() {
List<Integer> sampleInputData = Arrays.asList(1, 5, 5, 5, 5);
createTestPipeline(sampleInputData, new MonitorWithTimeDistribution());
PipelineResult result = testPipeline.run();
MetricsReader reader = new MetricsReader(result, NAMESPACE, 900000000001L);
assertEquals(-1, reader.getStartTimeMetric("timeDist"));
assertEquals(-1, reader.getEndTimeMetric("timeDist"));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class MetricsReaderTest method testEndTimeIsTheMaximumOfAllCollectedDistributions.
@Test
public void testEndTimeIsTheMaximumOfAllCollectedDistributions() {
List<Integer> sampleInputData = Arrays.asList(1, 2, 3, 4, 5);
createTestPipelineWithBranches(sampleInputData);
PipelineResult result = testPipeline.run();
MetricsReader reader = new MetricsReader(result, NAMESPACE, 0);
assertEquals(10, reader.getEndTimeMetric("timeDist"));
}
Aggregations