Search in sources :

Example 1 with SchemaAndRecord

use of org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord in project beam by apache.

the class BigQueryHllSketchCompatibilityIT method readSketchFromBigQuery.

private void readSketchFromBigQuery(String tableId, Long expectedCount) {
    String tableSpec = String.format("%s.%s", DATASET_ID, tableId);
    String query = String.format("SELECT HLL_COUNT.INIT(%s) AS %s FROM %s", DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec);
    SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray = input -> HllCount.getSketchFromByteBuffer((ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME));
    TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
    Pipeline p = Pipeline.create(options);
    PCollection<Long> result = p.apply(BigQueryIO.read(parseQueryResultToByteArray).withFormat(DataFormat.AVRO).fromQuery(query).usingStandardSql().withMethod(Method.DIRECT_READ).withCoder(ByteArrayCoder.of())).apply(// no-op, only for testing MergePartial
    HllCount.MergePartial.globally()).apply(HllCount.Extract.globally());
    PAssert.thatSingleton(result).isEqualTo(expectedCount);
    p.run().waitUntilFinish();
}
Also used : Arrays(java.util.Arrays) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) BigqueryMatcher.queryResultHasChecksum(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.queryResultHasChecksum) Date(java.util.Date) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) SchemaAndRecord(org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord) ByteBuffer(java.nio.ByteBuffer) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableRow(com.google.api.services.bigquery.model.TableRow) TableSchema(com.google.api.services.bigquery.model.TableSchema) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) BigqueryClient(org.apache.beam.sdk.io.gcp.testing.BigqueryClient) TableReference(com.google.api.services.bigquery.model.TableReference) AfterClass(org.junit.AfterClass) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) PAssert(org.apache.beam.sdk.testing.PAssert) BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) ApplicationNameOptions(org.apache.beam.sdk.options.ApplicationNameOptions) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Table(com.google.api.services.bigquery.model.Table) DataFormat(com.google.cloud.bigquery.storage.v1.DataFormat) List(java.util.List) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BigqueryMatcher.createQueryUsingStandardSql(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.createQueryUsingStandardSql) Collections(java.util.Collections) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) SchemaAndRecord(org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

Table (com.google.api.services.bigquery.model.Table)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableReference (com.google.api.services.bigquery.model.TableReference)1 TableRow (com.google.api.services.bigquery.model.TableRow)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 DataFormat (com.google.cloud.bigquery.storage.v1.DataFormat)1 ByteBuffer (java.nio.ByteBuffer)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Date (java.util.Date)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 Pipeline (org.apache.beam.sdk.Pipeline)1 ByteArrayCoder (org.apache.beam.sdk.coders.ByteArrayCoder)1 GcpOptions (org.apache.beam.sdk.extensions.gcp.options.GcpOptions)1 BigQueryIO (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO)1 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method)1 SchemaAndRecord (org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord)1 BigqueryClient (org.apache.beam.sdk.io.gcp.testing.BigqueryClient)1