Search in sources :

Example 1 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testWriteToTableDecorator.

@Test
public void testWriteToTableDecorator() throws Exception {
    TableRow row1 = new TableRow().set("name", "a").set("number", "1");
    TableRow row2 = new TableRow().set("name", "b").set("number", "2");
    // withMethod overrides the pipeline option, so we need to explicitly requiest
    // STORAGE_API_WRITES.
    BigQueryIO.Write.Method method = useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS;
    TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
    p.apply(Create.of(row1, row2)).apply(BigQueryIO.writeTableRows().to("project-id:dataset-id.table-id$20171127").withTestServices(fakeBqServices).withMethod(method).withSchema(schema).withoutValidation());
    p.run();
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 2 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testTimePartitioning.

@Test
public void testTimePartitioning() throws Exception {
    BigQueryIO.Write.Method method;
    if (useStorageApi) {
        method = useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API;
    } else if (useStreaming) {
        method = Method.STREAMING_INSERTS;
    } else {
        method = Method.FILE_LOADS;
    }
    testTimePartitioning(method);
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) Test(org.junit.Test)

Example 3 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testTriggeredFileLoadsWithTempTablesAndDataset.

@Test
public void testTriggeredFileLoadsWithTempTablesAndDataset() throws Exception {
    String tableRef = "bigquery-project-id:dataset-id.table-id";
    List<TableRow> elements = Lists.newArrayList();
    for (int i = 0; i < 30; ++i) {
        elements.add(new TableRow().set("number", i));
    }
    TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
    BigQueryIO.Write.Method method = Method.FILE_LOADS;
    p.apply(testStream).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMaxBytesPerPartition(1).withMaxFilesPerPartition(1).withMethod(method).withoutValidation().withWriteTempDataset("temp-dataset-id"));
    p.run();
    final int projectIdSplitter = tableRef.indexOf(':');
    final String projectId = projectIdSplitter == -1 ? "project-id" : tableRef.substring(0, projectIdSplitter);
    assertThat(fakeDatasetService.getAllRows(projectId, "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 4 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testTriggeredFileLoadsWithTempTables.

public void testTriggeredFileLoadsWithTempTables(String tableRef) throws Exception {
    if (useStorageApi || !useStreaming) {
        return;
    }
    List<TableRow> elements = Lists.newArrayList();
    for (int i = 0; i < 30; ++i) {
        elements.add(new TableRow().set("number", i));
    }
    TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
    BigQueryIO.Write.Method method = Method.FILE_LOADS;
    p.apply(testStream).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMaxBytesPerPartition(1).withMaxFilesPerPartition(1).withMethod(method).withoutValidation());
    p.run();
    final int projectIdSplitter = tableRef.indexOf(':');
    final String projectId = projectIdSplitter == -1 ? "project-id" : tableRef.substring(0, projectIdSplitter);
    assertThat(fakeDatasetService.getAllRows(projectId, "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 5 with Method

use of org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method in project beam by apache.

the class BigQueryIOWriteTest method testCreateNever.

@Test
public void testCreateNever() throws Exception {
    BigQueryIO.Write.Method method = useStreaming ? (useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS) : useStorageApi ? Method.STORAGE_WRITE_API : Method.FILE_LOADS;
    p.enableAbandonedNodeEnforcement(false);
    TableReference tableRef = BigQueryHelpers.parseTableSpec("project-id:dataset-id.table");
    TableSchema tableSchema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
    fakeDatasetService.createTable(new Table().setTableReference(tableRef).setSchema(tableSchema));
    PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0).to(10)).apply(MapElements.via(new SimpleFunction<Long, TableRow>() {

        @Override
        public TableRow apply(Long input) {
            return new TableRow().set("name", "name " + input).set("number", input);
        }
    })).setCoder(TableRowJsonCoder.of());
    tableRows.apply(BigQueryIO.writeTableRows().to(tableRef).withMethod(method).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER).withTestServices(fakeBqServices).withoutValidation());
    p.run();
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableReference(com.google.api.services.bigquery.model.TableReference) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Aggregations

Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)8 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)8 TableRow (com.google.api.services.bigquery.model.TableRow)7 Test (org.junit.Test)7 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)6 TableSchema (com.google.api.services.bigquery.model.TableSchema)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)3 Table (com.google.api.services.bigquery.model.Table)2 Clustering (com.google.api.services.bigquery.model.Clustering)1 TableReference (com.google.api.services.bigquery.model.TableReference)1 TimePartitioning (com.google.api.services.bigquery.model.TimePartitioning)1 ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)1