use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class BeamEnumerableConverter method count.
private static Enumerable<Object> count(PipelineOptions options, BeamRelNode node) {
Pipeline pipeline = Pipeline.create(options);
BeamSqlRelUtils.toPCollection(pipeline, node).apply(ParDo.of(new RowCounter()));
PipelineResult result = pipeline.run();
long count = 0;
if (!containsUnboundedPCollection(pipeline)) {
if (PipelineResult.State.FAILED.equals(result.waitUntilFinish())) {
throw new RuntimeException("Pipeline failed for unknown reason");
}
MetricQueryResults metrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(BeamEnumerableConverter.class, "rows")).build());
Iterator<MetricResult<Long>> iterator = metrics.getCounters().iterator();
if (iterator.hasNext()) {
count = iterator.next().getAttempted();
}
}
return Linq4j.singletonEnumerable(count);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class SpannerWriteIT method testSequentialWrite.
@Test
public void testSequentialWrite() throws Exception {
int numRecords = 100;
SpannerWriteResult stepOne = p.apply("first step", GenerateSequence.from(0).to(numRecords)).apply(ParDo.of(new GenerateMutations(options.getTable()))).apply(SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
p.apply("second step", GenerateSequence.from(numRecords).to(2 * numRecords)).apply("Gen mutations", ParDo.of(new GenerateMutations(options.getTable()))).apply(Wait.on(stepOne.getOutput())).apply("write to table2", SpannerIO.write().withProjectId(project).withInstanceId(options.getInstanceId()).withDatabaseId(databaseName));
PipelineResult result = p.run();
result.waitUntilFinish();
assertThat(result.getState(), is(PipelineResult.State.DONE));
assertThat(countNumberOfRecords(), equalTo(2L * numRecords));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class ReadWriteIT method testReadWrite.
@Test
public void testReadWrite() throws Exception {
pipeline.getOptions().as(StreamingOptions.class).setStreaming(true);
pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
TopicPath topic = createTopic(getProject(pipeline.getOptions()));
SubscriptionPath subscription = null;
Exception lastException = null;
for (int i = 0; i < 30; ++i) {
// Sleep for topic creation to propagate.
Thread.sleep(1000);
try {
subscription = createSubscription(topic);
break;
} catch (Exception e) {
lastException = e;
LOG.info("Retrying exception on subscription creation.", e);
}
}
if (subscription == null) {
throw lastException;
}
// Publish some messages
writeMessages(topic, pipeline);
// Read some messages. They should be deduplicated by the time we see them, so there should be
// exactly numMessages, one for every index in [0,MESSAGE_COUNT).
PCollection<SequencedMessage> messages = readMessages(subscription, pipeline);
PCollection<Integer> ids = messages.apply(MapElements.via(extractIds()));
ids.apply("PubsubSignalTest", signal.signalSuccessWhen(BigEndianIntegerCoder.of(), testIds()));
Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
pipeline.apply(signal.signalStart());
PipelineResult job = pipeline.run();
start.get();
LOG.info("Running!");
signal.waitForSuccess(Duration.standardMinutes(5));
// A runner may not support cancel
try {
job.cancel();
} catch (UnsupportedOperationException exc) {
// noop
}
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class InfluxDbIOTest method validateWriteTest.
@Test
public void validateWriteTest() {
InfluxDB influxDb = Mockito.mock(InfluxDB.class);
PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString(), any(OkHttpClient.Builder.class))).thenReturn(influxDb);
PowerMockito.when(InfluxDBFactory.connect(anyString(), anyString(), anyString())).thenReturn(influxDb);
String influxHost = "http://localhost";
String userName = "admin";
String password = "admin";
String influxDatabaseName = "testDataBase";
AtomicInteger countInvocation = new AtomicInteger();
Mockito.doAnswer(invocation -> countInvocation.getAndIncrement()).when(influxDb).write(any(List.class));
doReturn(getDatabase(influxDatabaseName)).when(influxDb).query(new Query("SHOW DATABASES"));
final int numOfElementsToWrite = 1000;
pipeline.apply("Generate data", Create.of(GenerateData.getMetric("test_m", numOfElementsToWrite))).apply("Write data to InfluxDB", InfluxDbIO.write().withDataSourceConfiguration(DataSourceConfiguration.create(StaticValueProvider.of(influxHost), StaticValueProvider.of(userName), StaticValueProvider.of(password))).withDatabase(influxDatabaseName));
PipelineResult result = pipeline.run();
Assert.assertEquals(State.DONE, result.waitUntilFinish());
Assert.assertTrue(countInvocation.get() > 0);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class HadoopFormatIOIT method writeAndReadUsingHadoopFormat.
@Test
public void writeAndReadUsingHadoopFormat() {
writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfRows)).apply("Produce db rows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn())).apply("Prevent fusion before writing", Reshuffle.viaRandomKey()).apply("Collect write time", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Construct rows for DBOutputFormat", ParDo.of(new ConstructDBOutputFormatRowFn())).apply("Write using Hadoop OutputFormat", HadoopFormatIO.<TestRowDBWritable, NullWritable>write().withConfiguration(hadoopConfiguration.get()).withPartitioning().withExternalSynchronization(new HDFSSynchronization(tmpFolder.getRoot().getAbsolutePath())));
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
PCollection<String> consolidatedHashcode = readPipeline.apply("Read using Hadoop InputFormat", HadoopFormatIO.<LongWritable, TestRowDBWritable>read().withConfiguration(hadoopConfiguration.get())).apply("Collect read time", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Get values only", Values.create()).apply("Values as string", ParDo.of(new TestRow.SelectNameFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(getExpectedHashForRowCount(numberOfRows));
PipelineResult readResult = readPipeline.run();
readResult.waitUntilFinish();
if (!options.isWithTestcontainers()) {
collectAndPublishMetrics(writeResult, readResult);
}
}
Aggregations