Search in sources :

Example 1 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryServicesImplTest method testCreateTableSucceedsAlreadyExists.

/**
   * Tests that table creation succeeds when the table already exists.
   */
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    TableSchema schema = new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("column1").setType("String"), new TableFieldSchema().setName("column2").setType("Integer")));
    Table testTable = new Table().setTableReference(ref).setSchema(schema);
    // 409 means already exists
    when(response.getStatusCode()).thenReturn(409);
    BigQueryServicesImpl.DatasetServiceImpl services = new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    Table ret = services.tryCreateTable(testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT);
    assertNull(ret);
    verify(response, times(1)).getStatusCode();
    verify(response, times(1)).getContent();
    verify(response, times(1)).getContentType();
}
Also used : RetryBoundedBackOff(com.google.cloud.hadoop.util.RetryBoundedBackOff) TableReference(com.google.api.services.bigquery.model.TableReference) Table(com.google.api.services.bigquery.model.Table) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 2 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class FakeJobService method runLoadJob.

private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load) throws InterruptedException, IOException {
    TableReference destination = load.getDestinationTable();
    TableSchema schema = load.getSchema();
    List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
    WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
    CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
    checkArgument(load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON"));
    Table existingTable = datasetService.getTable(destination);
    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
        return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
    }
    datasetService.createTable(new Table().setTableReference(destination).setSchema(schema));
    List<TableRow> rows = Lists.newArrayList();
    for (ResourceId filename : sourceFiles) {
        rows.addAll(readRows(filename.toString()));
    }
    datasetService.insertAll(destination, rows, null);
    return new JobStatus().setState("DONE");
}
Also used : JobStatus(com.google.api.services.bigquery.model.JobStatus) TableReference(com.google.api.services.bigquery.model.TableReference) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) TableSchema(com.google.api.services.bigquery.model.TableSchema) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) TableRow(com.google.api.services.bigquery.model.TableRow) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition)

Example 3 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class FakeJobService method writeRows.

private long writeRows(String tableId, List<TableRow> rows, TableSchema schema, String destinationPattern) throws IOException {
    Schema avroSchema = BigQueryAvroUtils.toGenericAvroSchema(tableId, schema.getFields());
    List<TableRow> rowsToWrite = Lists.newArrayList();
    int shard = 0;
    for (int i = 0; i < rows.size(); ++i) {
        rowsToWrite.add(rows.get(i));
        if (rowsToWrite.size() == 5) {
            writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
            rowsToWrite.clear();
        }
    }
    if (!rowsToWrite.isEmpty()) {
        writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
    }
    return shard;
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) Schema(org.apache.avro.Schema) TableRow(com.google.api.services.bigquery.model.TableRow)

Example 4 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class BigQueryTornadoes method main.

public static void main(String[] args) {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline p = Pipeline.create(options);
    // Build the table schema for the output table.
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
    TableSchema schema = new TableSchema().setFields(fields);
    p.apply(BigQueryIO.read().from(options.getInput())).apply(new CountTornadoes()).apply(BigQueryIO.writeTableRows().to(options.getOutput()).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    p.run().waitUntilFinish();
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Pipeline(org.apache.beam.sdk.Pipeline)

Example 5 with TableSchema

use of com.google.api.services.bigquery.model.TableSchema in project beam by apache.

the class TriggerExample method getSchema.

/** Defines the BigQuery schema used for the output. */
private static TableSchema getSchema() {
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("trigger_type").setType("STRING"));
    fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
    fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("number_of_records").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("window").setType("STRING"));
    fields.add(new TableFieldSchema().setName("isFirst").setType("BOOLEAN"));
    fields.add(new TableFieldSchema().setName("isLast").setType("BOOLEAN"));
    fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
    fields.add(new TableFieldSchema().setName("event_time").setType("TIMESTAMP"));
    fields.add(new TableFieldSchema().setName("processing_time").setType("TIMESTAMP"));
    TableSchema schema = new TableSchema().setFields(fields);
    return schema;
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Aggregations

TableSchema (com.google.api.services.bigquery.model.TableSchema)31 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)20 TableRow (com.google.api.services.bigquery.model.TableRow)18 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)13 Test (org.junit.Test)13 TableReference (com.google.api.services.bigquery.model.TableReference)12 Pipeline (org.apache.beam.sdk.Pipeline)12 ArrayList (java.util.ArrayList)10 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)9 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)8 Table (com.google.api.services.bigquery.model.Table)7 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)7 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)7 HashBasedTable (com.google.common.collect.HashBasedTable)6 JobStatus (com.google.api.services.bigquery.model.JobStatus)5 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)4 JobStatistics4 (com.google.api.services.bigquery.model.JobStatistics4)4 Path (java.nio.file.Path)4 Map (java.util.Map)4 Job (com.google.api.services.bigquery.model.Job)3