Search in sources :

Example 1 with BigQueryIO

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.

the class BigQueryIOIT method testAvroWrite.

private void testAvroWrite() {
    BigQueryIO.Write<byte[]> writeIO = BigQueryIO.<byte[]>write().withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE).withAvroFormatFunction(writeRequest -> {
        byte[] data = writeRequest.getElement();
        GenericRecord record = new GenericData.Record(writeRequest.getSchema());
        record.put("data", ByteBuffer.wrap(data));
        return record;
    });
    testWrite(writeIO, AVRO_WRITE_TIME_METRIC_NAME);
}
Also used : BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 2 with BigQueryIO

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.

the class BigQueryIOIT method testJsonWrite.

private void testJsonWrite() {
    BigQueryIO.Write<byte[]> writeIO = BigQueryIO.<byte[]>write().withFormatFunction(input -> {
        TableRow tableRow = new TableRow();
        tableRow.set("data", input);
        return tableRow;
    });
    testWrite(writeIO, WRITE_TIME_METRIC_NAME);
}
Also used : BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) TableRow(com.google.api.services.bigquery.model.TableRow)

Example 3 with BigQueryIO

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO in project beam by apache.

the class BigQueryHllSketchCompatibilityIT method writeSketchToBigQuery.

private void writeSketchToBigQuery(List<String> testData, String expectedChecksum) {
    String tableSpec = String.format("%s.%s", DATASET_ID, SKETCH_TABLE_ID);
    String query = String.format("SELECT HLL_COUNT.EXTRACT(%s) FROM %s", SKETCH_FIELD_NAME, tableSpec);
    TableSchema tableSchema = new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName(SKETCH_FIELD_NAME).setType(SKETCH_FIELD_TYPE)));
    TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
    Pipeline p = Pipeline.create(options);
    // until we have a stub class for BigQuery TableRow
    @SuppressWarnings("nullness") SerializableFunction<byte[], TableRow> formatFn = sketch -> new TableRow().set(SKETCH_FIELD_NAME, sketch.length == 0 ? null : sketch);
    p.apply(Create.of(testData).withType(TypeDescriptor.of(String.class))).apply(HllCount.Init.forStrings().globally()).apply(BigQueryIO.<byte[]>write().to(tableSpec).withSchema(tableSchema).withFormatFunction(formatFn).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    p.run().waitUntilFinish();
    // BigqueryMatcher will send a query to retrieve the estimated count and verifies its
    // correctness using checksum.
    assertThat(createQueryUsingStandardSql(APP_NAME, PROJECT_ID, query), queryResultHasChecksum(expectedChecksum));
}
Also used : Arrays(java.util.Arrays) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) BigqueryMatcher.queryResultHasChecksum(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.queryResultHasChecksum) Date(java.util.Date) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) SchemaAndRecord(org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord) ByteBuffer(java.nio.ByteBuffer) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableRow(com.google.api.services.bigquery.model.TableRow) TableSchema(com.google.api.services.bigquery.model.TableSchema) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) BigqueryClient(org.apache.beam.sdk.io.gcp.testing.BigqueryClient) TableReference(com.google.api.services.bigquery.model.TableReference) AfterClass(org.junit.AfterClass) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) PAssert(org.apache.beam.sdk.testing.PAssert) BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) ApplicationNameOptions(org.apache.beam.sdk.options.ApplicationNameOptions) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Table(com.google.api.services.bigquery.model.Table) DataFormat(com.google.cloud.bigquery.storage.v1.DataFormat) List(java.util.List) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BigqueryMatcher.createQueryUsingStandardSql(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.createQueryUsingStandardSql) Collections(java.util.Collections) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

BigQueryIO (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 Table (com.google.api.services.bigquery.model.Table)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableReference (com.google.api.services.bigquery.model.TableReference)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 DataFormat (com.google.cloud.bigquery.storage.v1.DataFormat)1 ByteBuffer (java.nio.ByteBuffer)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Date (java.util.Date)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 Pipeline (org.apache.beam.sdk.Pipeline)1 ByteArrayCoder (org.apache.beam.sdk.coders.ByteArrayCoder)1 GcpOptions (org.apache.beam.sdk.extensions.gcp.options.GcpOptions)1 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method)1 SchemaAndRecord (org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord)1