Search in sources :

Example 6 with Write

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write in project beam by apache.

the class BigQueryIOWriteTest method testWriteValidateFailsBothFormatFunctions.

@Test
public void testWriteValidateFailsBothFormatFunctions() {
    if (useStorageApi) {
        return;
    }
    p.enableAbandonedNodeEnforcement(false);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Only one of withFormatFunction or withAvroFormatFunction/withAvroWriter maybe set, not both.");
    p.apply(Create.empty(INPUT_RECORD_CODER)).apply(BigQueryIO.<InputRecord>write().to("dataset.table").withSchema(new TableSchema()).withFormatFunction(r -> new TableRow()).withAvroFormatFunction(r -> new GenericData.Record(r.getSchema())).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));
}
Also used : ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) Encoder(org.apache.avro.io.Encoder) ResultCoder(org.apache.beam.sdk.io.gcp.bigquery.WritePartition.ResultCoder) Matcher(java.util.regex.Matcher) DoFnTester(org.apache.beam.sdk.transforms.DoFnTester) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FakeBigQueryServices(org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices) EnumSet(java.util.EnumSet) ValueProvider(org.apache.beam.sdk.options.ValueProvider) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) KvCoder(org.apache.beam.sdk.coders.KvCoder) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Serializable(java.io.Serializable) IncompatibleWindowException(org.apache.beam.sdk.transforms.windowing.IncompatibleWindowException) Assert.assertFalse(org.junit.Assert.assertFalse) AutoValue(com.google.auto.value.AutoValue) TestStream(org.apache.beam.sdk.testing.TestStream) Matchers.is(org.hamcrest.Matchers.is) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) KV(org.apache.beam.sdk.values.KV) FakeDatasetService(org.apache.beam.sdk.io.gcp.testing.FakeDatasetService) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) View(org.apache.beam.sdk.transforms.View) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Distinct(org.apache.beam.sdk.transforms.Distinct) Multimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap) TupleTag(org.apache.beam.sdk.values.TupleTag) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) StreamSupport(java.util.stream.StreamSupport) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) Result(org.apache.beam.sdk.io.gcp.bigquery.WriteTables.Result) Before(org.junit.Before) TableReference(com.google.api.services.bigquery.model.TableReference) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Files(java.nio.file.Files) PAssert(org.apache.beam.sdk.testing.PAssert) NonMergingWindowFn(org.apache.beam.sdk.transforms.windowing.NonMergingWindowFn) Parameter(org.junit.runners.Parameterized.Parameter) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) ShardedKeyCoder(org.apache.beam.sdk.coders.ShardedKeyCoder) Test(org.junit.Test) Schema(org.apache.beam.sdk.schemas.Schema) File(java.io.File) Assert.assertNull(org.junit.Assert.assertNull) Paths(java.nio.file.Paths) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) AtomicCoder(org.apache.beam.sdk.coders.AtomicCoder) DefaultSchema(org.apache.beam.sdk.schemas.annotations.DefaultSchema) FakeJobService(org.apache.beam.sdk.io.gcp.testing.FakeJobService) Assert.assertEquals(org.junit.Assert.assertEquals) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) TimePartitioning(com.google.api.services.bigquery.model.TimePartitioning) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) After(org.junit.After) TableRow(com.google.api.services.bigquery.model.TableRow) Assert.fail(org.junit.Assert.fail) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayListMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap) ShardedKey(org.apache.beam.sdk.values.ShardedKey) Parameterized(org.junit.runners.Parameterized) MapElements(org.apache.beam.sdk.transforms.MapElements) DatumWriter(org.apache.avro.io.DatumWriter) Collection(java.util.Collection) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) Description(org.junit.runner.Description) Collectors(java.util.stream.Collectors) List(java.util.List) Clustering(com.google.api.services.bigquery.model.Clustering) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Pattern(java.util.regex.Pattern) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) Statement(org.junit.runners.model.Statement) TestRule(org.junit.rules.TestRule) Parameters(org.junit.runners.Parameterized.Parameters) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) SerializableFunctions(org.apache.beam.sdk.transforms.SerializableFunctions) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SchemaUpdateOption(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption) WindowMappingFn(org.apache.beam.sdk.transforms.windowing.WindowMappingFn) SchemaCreate(org.apache.beam.sdk.schemas.annotations.SchemaCreate) Job(com.google.api.services.bigquery.model.Job) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExpectedException(org.junit.rules.ExpectedException) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) OutputStream(java.io.OutputStream) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) GenericRecord(org.apache.avro.generic.GenericRecord) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) Matchers(org.hamcrest.Matchers) PCollection(org.apache.beam.sdk.values.PCollection) Table(com.google.api.services.bigquery.model.Table) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Collections(java.util.Collections) JobConfigurationLoad(com.google.api.services.bigquery.model.JobConfigurationLoad) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) GenericData(org.apache.avro.generic.GenericData) Test(org.junit.Test)

Example 7 with Write

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write in project beam by apache.

the class BigQuerySchemaUpdateOptionsIT method runWriteTest.

/**
 * Runs a write test against a BigQuery table to check that SchemaUpdateOption sets are taking
 * effect.
 *
 * <p>Attempt write a row via BigQueryIO.writeTables with the given params, then run the given
 * query, and finaly check the results of the query.
 *
 * @param schemaUpdateOptions The SchemaUpdateOption set to use
 * @param tableName The table to write to
 * @param schema The schema to use for the table
 * @param rowToInsert The row to insert
 * @param testQuery A testing SQL query to run after writing the row
 * @param expectedResult The expected result of the query as a nested list of column values (one
 *     list per result row)
 */
private void runWriteTest(Set<SchemaUpdateOption> schemaUpdateOptions, String tableName, TableSchema schema, TableRow rowToInsert, String testQuery, List<List<String>> expectedResult) throws Exception {
    Options options = TestPipeline.testingPipelineOptions().as(Options.class);
    options.setTempLocation(options.getTempRoot() + "/bq_it_temp");
    Pipeline p = Pipeline.create(options);
    Create.Values<TableRow> input = Create.<TableRow>of(rowToInsert);
    Write<TableRow> writer = BigQueryIO.writeTableRows().to(String.format("%s:%s.%s", options.getProject(), BIG_QUERY_DATASET_ID, tableName)).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND).withSchemaUpdateOptions(schemaUpdateOptions);
    p.apply(input).apply(writer);
    p.run().waitUntilFinish();
    QueryResponse response = BQ_CLIENT.queryWithRetries(testQuery, project);
    List<List<String>> result = response.getRows().stream().map(row -> row.getF().stream().map(cell -> cell.getV().toString()).collect(Collectors.toList())).collect(Collectors.toList());
    assertEquals(expectedResult, result);
}
Also used : Arrays(java.util.Arrays) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) BeforeClass(org.junit.BeforeClass) RunWith(org.junit.runner.RunWith) LoggerFactory(org.slf4j.LoggerFactory) SecureRandom(java.security.SecureRandom) SchemaUpdateOption(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.SchemaUpdateOption) Create(org.apache.beam.sdk.transforms.Create) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableRow(com.google.api.services.bigquery.model.TableRow) TableSchema(com.google.api.services.bigquery.model.TableSchema) Pipeline(org.apache.beam.sdk.Pipeline) EnumSet(java.util.EnumSet) BigqueryClient(org.apache.beam.sdk.io.gcp.testing.BigqueryClient) QueryResponse(com.google.api.services.bigquery.model.QueryResponse) TableReference(com.google.api.services.bigquery.model.TableReference) AfterClass(org.junit.AfterClass) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Logger(org.slf4j.Logger) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Set(java.util.Set) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Collectors(java.util.stream.Collectors) Table(com.google.api.services.bigquery.model.Table) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) Assert.assertEquals(org.junit.Assert.assertEquals) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Create(org.apache.beam.sdk.transforms.Create) TableRow(com.google.api.services.bigquery.model.TableRow) QueryResponse(com.google.api.services.bigquery.model.QueryResponse) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

TableRow (com.google.api.services.bigquery.model.TableRow)7 Table (com.google.api.services.bigquery.model.Table)6 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)6 TableReference (com.google.api.services.bigquery.model.TableReference)6 TableSchema (com.google.api.services.bigquery.model.TableSchema)6 EnumSet (java.util.EnumSet)6 List (java.util.List)6 Set (java.util.Set)6 Collectors (java.util.stream.Collectors)6 Clustering (com.google.api.services.bigquery.model.Clustering)5 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)5 Job (com.google.api.services.bigquery.model.Job)5 JobConfigurationLoad (com.google.api.services.bigquery.model.JobConfigurationLoad)5 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)5 TimePartitioning (com.google.api.services.bigquery.model.TimePartitioning)5 AutoValue (com.google.auto.value.AutoValue)5 File (java.io.File)5 IOException (java.io.IOException)5 InputStream (java.io.InputStream)5 OutputStream (java.io.OutputStream)5