Search in sources :

Example 6 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testClusteringTableFunction.

@Test
public void testClusteringTableFunction() throws Exception {
    TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
    TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");
    TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
    Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
    TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("date").setType("DATE"), new TableFieldSchema().setName("number").setType("INTEGER")));
    // withMethod overrides the pipeline option, so we need to explicitly request
    // STORAGE_API_WRITES.
    BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.FILE_LOADS;
    p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to((ValueInSingleWindow<TableRow> vsw) -> {
        String tableSpec = "project-id:dataset-id.table-" + vsw.getValue().get("number");
        return new TableDestination(tableSpec, null, new TimePartitioning().setType("DAY").setField("date"), new Clustering().setFields(ImmutableList.of("date")));
    }).withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withClustering().withoutValidation());
    p.run();
    Table table = fakeDatasetService.getTable(BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1"));
    assertEquals(schema, table.getSchema());
    assertEquals(timePartitioning, table.getTimePartitioning());
    assertEquals(clustering, table.getClustering());
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Clustering(com.google.api.services.bigquery.model.Clustering) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TimePartitioning(com.google.api.services.bigquery.model.TimePartitioning) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 7 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testTriggeredFileLoads.

@Test
public void testTriggeredFileLoads() throws Exception {
    if (useStorageApi || !useStreaming) {
        return;
    }
    List<TableRow> elements = Lists.newArrayList();
    for (int i = 0; i < 30; ++i) {
        elements.add(new TableRow().set("number", i));
    }
    TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
    BigQueryIO.Write.Method method = Method.FILE_LOADS;
    p.apply(testStream).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id").withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMethod(method).withoutValidation());
    p.run();
    assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 8 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testSchemaWriteLoads.

@Test
public void testSchemaWriteLoads() throws Exception {
    // withMethod overrides the pipeline option, so we need to explicitly request
    // STORAGE_API_WRITES.
    BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.FILE_LOADS;
    p.apply(Create.of(new SchemaPojo("a", 1), new SchemaPojo("b", 2), new SchemaPojo("c", 3), new SchemaPojo("d", 4))).apply(BigQueryIO.<SchemaPojo>write().to("project-id:dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withMethod(method).useBeamSchema().withTestServices(fakeBqServices).withoutValidation());
    p.run();
    System.err.println("Wrote: " + fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"));
    assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4")));
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) Test(org.junit.Test)

Aggregations

Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)8 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)8 TableRow (com.google.api.services.bigquery.model.TableRow)7 Test (org.junit.Test)7 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)6 TableSchema (com.google.api.services.bigquery.model.TableSchema)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)3 Table (com.google.api.services.bigquery.model.Table)2 Clustering (com.google.api.services.bigquery.model.Clustering)1 TableReference (com.google.api.services.bigquery.model.TableReference)1 TimePartitioning (com.google.api.services.bigquery.model.TimePartitioning)1 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)1