use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.
the class BigQueryIOIT method testWrite.
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
Pipeline pipeline = Pipeline.create(options);
BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
pipeline.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))).apply("Map records", ParDo.of(new MapKVToV())).apply("Write to BQ", writeIO.to(tableQualifier).withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)).withMethod(method).withSchema(new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName("data").setType("BYTES")))));
PipelineResult pipelineResult = pipeline.run();
pipelineResult.waitUntilFinish();
extractAndPublishTime(pipelineResult, metricName);
}
use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.
the class BigQueryIOIT method testRead.
private void testRead() {
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Read from BQ", BigQueryIO.readTableRows().from(tableQualifier)).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
extractAndPublishTime(result, READ_TIME_METRIC_NAME);
}
use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.
the class HadoopFormatIOIT method writeAndReadUsingHadoopFormat.
@Test
public void writeAndReadUsingHadoopFormat() {
writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfRows)).apply("Produce db rows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn())).apply("Prevent fusion before writing", Reshuffle.viaRandomKey()).apply("Collect write time", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Construct rows for DBOutputFormat", ParDo.of(new ConstructDBOutputFormatRowFn())).apply("Write using Hadoop OutputFormat", HadoopFormatIO.<TestRowDBWritable, NullWritable>write().withConfiguration(hadoopConfiguration.get()).withPartitioning().withExternalSynchronization(new HDFSSynchronization(tmpFolder.getRoot().getAbsolutePath())));
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
PCollection<String> consolidatedHashcode = readPipeline.apply("Read using Hadoop InputFormat", HadoopFormatIO.<LongWritable, TestRowDBWritable>read().withConfiguration(hadoopConfiguration.get())).apply("Collect read time", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Get values only", Values.create()).apply("Values as string", ParDo.of(new TestRow.SelectNameFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(getExpectedHashForRowCount(numberOfRows));
PipelineResult readResult = readPipeline.run();
readResult.waitUntilFinish();
if (!options.isWithTestcontainers()) {
collectAndPublishMetrics(writeResult, readResult);
}
}
use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.
the class BigQueryIOPushDownIT method readUsingDirectReadMethodPushDown.
@Test
public void readUsingDirectReadMethodPushDown() {
sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result, "_directread_pushdown");
}
use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.
the class BigQueryIOPushDownIT method readUsingDirectReadMethod.
@Test
public void readUsingDirectReadMethod() {
List<RelOptRule> ruleList = new ArrayList<>();
for (RuleSet x : getRuleSets()) {
x.iterator().forEachRemaining(ruleList::add);
}
// Remove push-down rule
ruleList.remove(BeamIOPushDownRule.INSTANCE);
InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
inMemoryMetaStore.registerProvider(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))).build();
sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
PipelineResult result = pipeline.run();
result.waitUntilFinish();
collectAndPublishMetrics(result, "_directread");
}
Aggregations