Search in sources :

Example 6 with TimeMonitor

use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.

the class BigQueryIOIT method testWrite.

private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
    Pipeline pipeline = Pipeline.create(options);
    BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
    pipeline.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))).apply("Map records", ParDo.of(new MapKVToV())).apply("Write to BQ", writeIO.to(tableQualifier).withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)).withMethod(method).withSchema(new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName("data").setType("BYTES")))));
    PipelineResult pipelineResult = pipeline.run();
    pipelineResult.waitUntilFinish();
    extractAndPublishTime(pipelineResult, metricName);
}
Also used : SyntheticBoundedSource(org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource) TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) TableSchema(com.google.api.services.bigquery.model.TableSchema) PipelineResult(org.apache.beam.sdk.PipelineResult) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Pipeline(org.apache.beam.sdk.Pipeline)

Example 7 with TimeMonitor

use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.

the class BigQueryIOIT method testRead.

private void testRead() {
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("Read from BQ", BigQueryIO.readTableRows().from(tableQualifier)).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME)));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    extractAndPublishTime(result, READ_TIME_METRIC_NAME);
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 8 with TimeMonitor

use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.

the class HadoopFormatIOIT method writeAndReadUsingHadoopFormat.

@Test
public void writeAndReadUsingHadoopFormat() {
    writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfRows)).apply("Produce db rows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn())).apply("Prevent fusion before writing", Reshuffle.viaRandomKey()).apply("Collect write time", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Construct rows for DBOutputFormat", ParDo.of(new ConstructDBOutputFormatRowFn())).apply("Write using Hadoop OutputFormat", HadoopFormatIO.<TestRowDBWritable, NullWritable>write().withConfiguration(hadoopConfiguration.get()).withPartitioning().withExternalSynchronization(new HDFSSynchronization(tmpFolder.getRoot().getAbsolutePath())));
    PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    PCollection<String> consolidatedHashcode = readPipeline.apply("Read using Hadoop InputFormat", HadoopFormatIO.<LongWritable, TestRowDBWritable>read().withConfiguration(hadoopConfiguration.get())).apply("Collect read time", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Get values only", Values.create()).apply("Values as string", ParDo.of(new TestRow.SelectNameFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(getExpectedHashForRowCount(numberOfRows));
    PipelineResult readResult = readPipeline.run();
    readResult.waitUntilFinish();
    if (!options.isWithTestcontainers()) {
        collectAndPublishMetrics(writeResult, readResult);
    }
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) TestRow(org.apache.beam.sdk.io.common.TestRow) PipelineResult(org.apache.beam.sdk.PipelineResult) NullWritable(org.apache.hadoop.io.NullWritable) HashingFn(org.apache.beam.sdk.io.common.HashingFn) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 9 with TimeMonitor

use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.

the class BigQueryIOPushDownIT method readUsingDirectReadMethodPushDown.

@Test
public void readUsingDirectReadMethodPushDown() {
    sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
    BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
    BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishMetrics(result, "_directread_pushdown");
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 10 with TimeMonitor

use of org.apache.beam.sdk.testutils.metrics.TimeMonitor in project beam by apache.

the class BigQueryIOPushDownIT method readUsingDirectReadMethod.

@Test
public void readUsingDirectReadMethod() {
    List<RelOptRule> ruleList = new ArrayList<>();
    for (RuleSet x : getRuleSets()) {
        x.iterator().forEachRemaining(ruleList::add);
    }
    // Remove push-down rule
    ruleList.remove(BeamIOPushDownRule.INSTANCE);
    InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
    inMemoryMetaStore.registerProvider(new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
    sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))).build();
    sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));
    BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
    BeamSqlRelUtils.toPCollection(pipeline, beamRelNode).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishMetrics(result, "_directread");
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) RuleSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSet) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) ArrayList(java.util.ArrayList) PipelineResult(org.apache.beam.sdk.PipelineResult) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Test(org.junit.Test)

Aggregations

TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 PipelineResult (org.apache.beam.sdk.PipelineResult)13 Test (org.junit.Test)11 HashingFn (org.apache.beam.sdk.io.common.HashingFn)8 FileBasedIOITHelper (org.apache.beam.sdk.io.common.FileBasedIOITHelper)4 BeamRelNode (org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode)3 SyntheticBoundedSource (org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource)3 NamedTestResult (org.apache.beam.sdk.testutils.NamedTestResult)3 ArrayList (java.util.ArrayList)2 Pipeline (org.apache.beam.sdk.Pipeline)2 DeleteFileFn (org.apache.beam.sdk.io.common.FileBasedIOITHelper.DeleteFileFn)2 TestRow (org.apache.beam.sdk.io.common.TestRow)2 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 Timestamp (com.google.cloud.Timestamp)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1 UUID (java.util.UUID)1 Function (java.util.function.Function)1