Search in sources :

Example 56 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryRowCountIT method testPipelineOptionInjection.

/**
 * This tests if the pipeline options are injected in the path of SQL Transform.
 */
@Test
public void testPipelineOptionInjection() {
    BigQueryTestTableProvider provider = new BigQueryTestTableProvider();
    Table table = getTable("testTable", bigQuery.tableSpec());
    provider.addTable("testTable", table);
    pipeline.apply(Create.of(new TableRow().set("id", 1).set("name", "name1"), new TableRow().set("id", 2).set("name", "name2"), new TableRow().set("id", 3).set("name", "name3")).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to(bigQuery.tableSpec()).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("id").setType("INTEGER"), new TableFieldSchema().setName("name").setType("STRING")))).withoutValidation());
    pipeline.run().waitUntilFinish();
    // changing pipeline options
    readingPipeline.getOptions().setJobName(FAKE_JOB_NAME);
    // Reading from the table should update the statistics of bigQuery table
    readingPipeline.apply(SqlTransform.query(" select * from testTable ").withDefaultTableProvider("bigquery", provider));
    readingPipeline.run().waitUntilFinish();
    BigQueryTestTable sqlTable = (BigQueryTestTable) provider.buildBeamSqlTable(table);
    assertEquals(FAKE_JOB_NAME, sqlTable.getJobName());
}
Also used : Table(org.apache.beam.sdk.extensions.sql.meta.Table) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 57 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryRowCountIT method testNonEmptyTable.

@Test
public void testNonEmptyTable() {
    BigQueryTableProvider provider = new BigQueryTableProvider();
    Table table = getTable("testTable", bigQuery.tableSpec());
    pipeline.apply(Create.of(new TableRow().set("id", 1).set("name", "name1"), new TableRow().set("id", 2).set("name", "name2"), new TableRow().set("id", 3).set("name", "name3")).withCoder(TableRowJsonCoder.of())).apply(BigQueryIO.writeTableRows().to(bigQuery.tableSpec()).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("id").setType("INTEGER"), new TableFieldSchema().setName("name").setType("STRING")))).withoutValidation());
    pipeline.run().waitUntilFinish();
    BeamSqlTable sqlTable = provider.buildBeamSqlTable(table);
    BeamTableStatistics size1 = sqlTable.getTableStatistics(TestPipeline.testingPipelineOptions());
    assertNotNull(size1);
    assertEquals(3d, size1.getRowCount(), 0.1);
}
Also used : Table(org.apache.beam.sdk.extensions.sql.meta.Table) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) BeamTableStatistics(org.apache.beam.sdk.extensions.sql.impl.BeamTableStatistics) Test(org.junit.Test)

Example 58 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class TableRowToStorageApiProto method descriptorSchemaFromTableFieldSchemas.

private static DescriptorProto descriptorSchemaFromTableFieldSchemas(Iterable<TableFieldSchema> tableFieldSchemas) {
    DescriptorProto.Builder descriptorBuilder = DescriptorProto.newBuilder();
    // Create a unique name for the descriptor ('-' characters cannot be used).
    descriptorBuilder.setName("D" + UUID.randomUUID().toString().replace("-", "_"));
    int i = 1;
    for (TableFieldSchema fieldSchema : tableFieldSchemas) {
        fieldDescriptorFromTableField(fieldSchema, i++, descriptorBuilder);
    }
    return descriptorBuilder.build();
}
Also used : FieldDescriptorProto(com.google.protobuf.DescriptorProtos.FieldDescriptorProto) FileDescriptorProto(com.google.protobuf.DescriptorProtos.FileDescriptorProto) DescriptorProto(com.google.protobuf.DescriptorProtos.DescriptorProto) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 59 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOWriteTest method testTriggeredFileLoadsWithTempTables.

public void testTriggeredFileLoadsWithTempTables(String tableRef) throws Exception {
    if (useStorageApi || !useStreaming) {
        return;
    }
    List<TableRow> elements = Lists.newArrayList();
    for (int i = 0; i < 30; ++i) {
        elements.add(new TableRow().set("number", i));
    }
    TestStream<TableRow> testStream = TestStream.create(TableRowJsonCoder.of()).addElements(elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class)).advanceProcessingTime(Duration.standardMinutes(1)).addElements(elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class)).advanceWatermarkToInfinity();
    BigQueryIO.Write.Method method = Method.FILE_LOADS;
    p.apply(testStream).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")))).withTestServices(fakeBqServices).withTriggeringFrequency(Duration.standardSeconds(30)).withNumFileShards(2).withMaxBytesPerPartition(1).withMaxFilesPerPartition(1).withMethod(method).withoutValidation());
    p.run();
    final int projectIdSplitter = tableRef.indexOf(':');
    final String projectId = projectIdSplitter == -1 ? "project-id" : tableRef.substring(0, projectIdSplitter);
    assertThat(fakeDatasetService.getAllRows(projectId, "dataset-id", "table-id"), containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 60 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOWriteTest method testCreateNever.

@Test
public void testCreateNever() throws Exception {
    BigQueryIO.Write.Method method = useStreaming ? (useStorageApi ? (useStorageApiApproximate ? Method.STORAGE_API_AT_LEAST_ONCE : Method.STORAGE_WRITE_API) : Method.STREAMING_INSERTS) : useStorageApi ? Method.STORAGE_WRITE_API : Method.FILE_LOADS;
    p.enableAbandonedNodeEnforcement(false);
    TableReference tableRef = BigQueryHelpers.parseTableSpec("project-id:dataset-id.table");
    TableSchema tableSchema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")));
    fakeDatasetService.createTable(new Table().setTableReference(tableRef).setSchema(tableSchema));
    PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0).to(10)).apply(MapElements.via(new SimpleFunction<Long, TableRow>() {

        @Override
        public TableRow apply(Long input) {
            return new TableRow().set("name", "name " + input).set("number", input);
        }
    })).setCoder(TableRowJsonCoder.of());
    tableRows.apply(BigQueryIO.writeTableRows().to(tableRef).withMethod(method).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER).withTestServices(fakeBqServices).withoutValidation());
    p.run();
}
Also used : Write(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write) TableReference(com.google.api.services.bigquery.model.TableReference) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableRow(com.google.api.services.bigquery.model.TableRow) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Aggregations

TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)80 TableSchema (com.google.api.services.bigquery.model.TableSchema)71 TableRow (com.google.api.services.bigquery.model.TableRow)56 Test (org.junit.Test)45 Table (com.google.api.services.bigquery.model.Table)25 TableReference (com.google.api.services.bigquery.model.TableReference)23 ArrayList (java.util.ArrayList)17 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)16 List (java.util.List)15 Map (java.util.Map)15 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)13 Pipeline (org.apache.beam.sdk.Pipeline)12 ByteString (com.google.protobuf.ByteString)10 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)10 Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)10 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)10 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)9 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)9 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)8