Search in sources :

Example 16 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryUtilsTest method testToTableSchema_row.

@Test
public void testToTableSchema_row() {
    TableSchema schema = toTableSchema(ROW_TYPE);
    assertThat(schema.getFields().size(), equalTo(1));
    TableFieldSchema field = schema.getFields().get(0);
    assertThat(field.getName(), equalTo("row"));
    assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
    assertThat(field.getMode(), nullValue());
    assertThat(field.getFields(), containsInAnyOrder(ID, VALUE, NAME, TIMESTAMP_VARIANT1, TIMESTAMP_VARIANT2, TIMESTAMP_VARIANT3, TIMESTAMP_VARIANT4, DATETIME, DATETIME_0MS, DATETIME_0S_NS, DATETIME_0S_0NS, DATE, TIME, TIME_0MS, TIME_0S_NS, TIME_0S_0NS, VALID, BINARY, NUMERIC, BOOLEAN, LONG, DOUBLE));
}
Also used : BigQueryUtils.toTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.toTableSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 17 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class NexmarkLauncher method sinkResultsToBigQuery.

/**
 * Send {@code formattedResults} to BigQuery.
 */
private void sinkResultsToBigQuery(PCollection<String> formattedResults, long now, String version) {
    String tableSpec = NexmarkUtils.tableSpec(options, queryName, now, version);
    TableSchema tableSchema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("result").setType("STRING"), new TableFieldSchema().setName("records").setMode("REPEATED").setType("RECORD").setFields(ImmutableList.of(new TableFieldSchema().setName("index").setType("INTEGER"), new TableFieldSchema().setName("value").setType("STRING")))));
    NexmarkUtils.console("Writing results to BigQuery table %s", tableSpec);
    BigQueryIO.Write io = BigQueryIO.write().to(tableSpec).withSchema(tableSchema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND);
    formattedResults.apply(queryName + ".StringToTableRow", ParDo.of(new StringToTableRow())).apply(queryName + ".WriteBigQueryResults", io);
}
Also used : BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 18 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOWriteTest method testWriteAvroWithCustomWriter.

@Test
public void testWriteAvroWithCustomWriter() throws Exception {
    if (useStorageApi || useStreaming) {
        return;
    }
    SerializableFunction<AvroWriteRequest<InputRecord>, GenericRecord> formatFunction = r -> {
        GenericRecord rec = new GenericData.Record(r.getSchema());
        InputRecord i = r.getElement();
        rec.put("strVal", i.strVal());
        rec.put("longVal", i.longVal());
        rec.put("doubleVal", i.doubleVal());
        rec.put("instantVal", i.instantVal().getMillis() * 1000);
        return rec;
    };
    SerializableFunction<org.apache.avro.Schema, DatumWriter<GenericRecord>> customWriterFactory = s -> new GenericDatumWriter<GenericRecord>() {

        @Override
        protected void writeString(org.apache.avro.Schema schema, Object datum, Encoder out) throws IOException {
            super.writeString(schema, datum.toString() + "_custom", out);
        }
    };
    p.apply(Create.of(InputRecord.create("test", 1, 1.0, Instant.parse("2019-01-01T00:00:00Z")), InputRecord.create("test2", 2, 2.0, Instant.parse("2019-02-01T00:00:00Z"))).withCoder(INPUT_RECORD_CODER)).apply(BigQueryIO.<InputRecord>write().to("dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("strVal").setType("STRING"), new TableFieldSchema().setName("longVal").setType("INTEGER"), new TableFieldSchema().setName("doubleVal").setType("FLOAT"), new TableFieldSchema().setName("instantVal").setType("TIMESTAMP")))).withTestServices(fakeBqServices).withAvroWriter(formatFunction, customWriterFactory).withoutValidation());
    p.run();
    assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("strVal", "test_custom").set("longVal", "1").set("doubleVal", 1.0D).set("instantVal", "2019-01-01 00:00:00 UTC"), new TableRow().set("strVal", "test2_custom").set("longVal", "2").set("doubleVal", 2.0D).set("instantVal", "2019-02-01 00:00:00 UTC")));
}
Also used : ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) Encoder(org.apache.avro.io.Encoder) ResultCoder(org.apache.beam.sdk.io.gcp.bigquery.WritePartition.ResultCoder) Matcher(java.util.regex.Matcher) DoFnTester(org.apache.beam.sdk.transforms.DoFnTester) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) EnumSet(java.util.EnumSet) ValueProvider(org.apache.beam.sdk.options.ValueProvider) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) KvCoder(org.apache.beam.sdk.coders.KvCoder) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Serializable(java.io.Serializable) IncompatibleWindowException(org.apache.beam.sdk.transforms.windowing.IncompatibleWindowException) Assert.assertFalse(org.junit.Assert.assertFalse) AutoValue(com.google.auto.value.AutoValue) TestStream(org.apache.beam.sdk.testing.TestStream) Matchers.is(org.hamcrest.Matchers.is) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) KV(org.apache.beam.sdk.values.KV) FakeDatasetService(org.apache.beam.sdk.io.gcp.testing.FakeDatasetService) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) View(org.apache.beam.sdk.transforms.View) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Distinct(org.apache.beam.sdk.transforms.Distinct) Multimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap) TupleTag(org.apache.beam.sdk.values.TupleTag) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) StreamSupport(java.util.stream.StreamSupport) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) Result(org.apache.beam.sdk.io.gcp.bigquery.WriteTables.Result) Before(org.junit.Before) TableReference(com.google.api.services.bigquery.model.TableReference) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) NonMergingWindowFn(org.apache.beam.sdk.transforms.windowing.NonMergingWindowFn) Parameter(org.junit.runners.Parameterized.Parameter) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) ShardedKeyCoder(org.apache.beam.sdk.coders.ShardedKeyCoder) Test(org.junit.Test) Schema(org.apache.beam.sdk.schemas.Schema) File(java.io.File) Assert.assertNull(org.junit.Assert.assertNull) Paths(java.nio.file.Paths) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) AtomicCoder(org.apache.beam.sdk.coders.AtomicCoder) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) FakeJobService(org.apache.beam.sdk.io.gcp.testing.FakeJobService) Assert.assertEquals(org.junit.Assert.assertEquals) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TimePartitioning(com.google.api.services.bigquery.model.TimePartitioning) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) After(org.junit.After) TableRow(com.google.api.services.bigquery.model.TableRow) Assert.fail(org.junit.Assert.fail) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap) ShardedKey(org.apache.beam.sdk.values.ShardedKey) Parameterized(org.junit.runners.Parameterized) MapElements(org.apache.beam.sdk.transforms.MapElements) DatumWriter(org.apache.avro.io.DatumWriter) Collection(java.util.Collection) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) Description(org.junit.runner.Description) Collectors(java.util.stream.Collectors) List(java.util.List) Clustering(com.google.api.services.bigquery.model.Clustering) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) Statement(org.junit.runners.model.Statement) TestRule(org.junit.rules.TestRule) Parameters(org.junit.runners.Parameterized.Parameters) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) SerializableFunctions(org.apache.beam.sdk.transforms.SerializableFunctions) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SchemaUpdateOption(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption) WindowMappingFn(org.apache.beam.sdk.transforms.windowing.WindowMappingFn) SchemaCreate(org.apache.beam.sdk.schemas.annotations.SchemaCreate) Job(com.google.api.services.bigquery.model.Job) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExpectedException(org.junit.rules.ExpectedException) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) OutputStream(java.io.OutputStream) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) GenericRecord(org.apache.avro.generic.GenericRecord) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) Matchers(org.hamcrest.Matchers) PCollection(org.apache.beam.sdk.values.PCollection) Table(com.google.api.services.bigquery.model.Table) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Collections(java.util.Collections) JobConfigurationLoad(com.google.api.services.bigquery.model.JobConfigurationLoad) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) TableSchema(com.google.api.services.bigquery.model.TableSchema) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.beam.sdk.schemas.Schema) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericData(org.apache.avro.generic.GenericData) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DatumWriter(org.apache.avro.io.DatumWriter) Encoder(org.apache.avro.io.Encoder) TableRow(com.google.api.services.bigquery.model.TableRow) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 19 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOWriteTest method testWriteToTableDecorator.

@Test
public void testWriteToTableDecorator() throws Exception {
    TableRow row1 = new TableRow().set("name", "a").set("number", "1");
    TableRow row2 = new TableRow().set("name", "b").set("number", "2");
    // withMethod overrides the pipeline option, so we need to explicitly requiest
    // STORAGE_API_WRITES.
    BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS;
    TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
    p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id$20171127").withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withoutValidation());
    p.run();
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 20 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOWriteTest method testWrongErrorConfigs.

@Test
public void testWrongErrorConfigs() {
    if (useStorageApi) {
        return;
    }
    p.enableAutoRunIfMissing(true);
    TableRow row1 = new TableRow().set("name", "a").set("number", "1");
    BigQueryIO.Write<TableRow> bqIoWrite = BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()).withTestServices(fakeBqServices).withoutValidation();
    try {
        p.apply("Create1", Create.<TableRow>of(row1)).apply("Write 1", bqIoWrite).getFailedInsertsWithErr();
        fail();
    } catch (IllegalArgumentException e) {
        assertThat(e.getMessage(), is("Cannot use getFailedInsertsWithErr as this WriteResult " + "does not use extended errors. Use getFailedInserts instead"));
    }
    try {
        p.apply("Create2", Create.<TableRow>of(row1)).apply("Write2", bqIoWrite.withExtendedErrorInfo()).getFailedInserts();
        fail();
    } catch (IllegalArgumentException e) {
        assertThat(e.getMessage(), is("Cannot use getFailedInserts as this WriteResult " + "uses extended errors information. Use getFailedInsertsWithErr instead"));
    }
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Aggregations

TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)80 TableSchema (com.google.api.services.bigquery.model.TableSchema)71 TableRow (com.google.api.services.bigquery.model.TableRow)56 Test (org.junit.Test)45 Table (com.google.api.services.bigquery.model.Table)25 TableReference (com.google.api.services.bigquery.model.TableReference)23 ArrayList (java.util.ArrayList)17 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)16 List (java.util.List)15 Map (java.util.Map)15 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)13 Pipeline (org.apache.beam.sdk.Pipeline)12 ByteString (com.google.protobuf.ByteString)10 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)10 Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)10 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)10 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)9 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)9 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)8