Search in sources :

Example 46 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryTableRowIterator method getTypedTableRow.

/**
   * Converts a row returned from the BigQuery JSON API as a {@code Map<String, Object>} into a
   * Java {@link TableRow} with nested {@link TableCell TableCells}. The {@code Object} values in
   * the cells are converted to Java types according to the provided field schemas.
   *
   * <p>See {@link #getTypedCellValue(TableFieldSchema, Object)} for details on how BigQuery
   * types are mapped to Java types.
   */
private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
    // If rawRow is a TableRow, use it. If not, create a new one.
    TableRow row;
    List<? extends Map<String, Object>> cells;
    if (rawRow instanceof TableRow) {
        // Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
        // any type conversion, but extract the cells for cell-wise processing below.
        row = (TableRow) rawRow;
        cells = row.getF();
        // Clear the cells from the row, so that row.getF() will return null. This matches the
        // behavior of rows produced by the BigQuery export API used on the service.
        row.setF(null);
    } else {
        row = new TableRow();
        // Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
        // get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
        // we will use Map.get("v") instead of TableCell.getV() get its value.
        @SuppressWarnings("unchecked") List<? extends Map<String, Object>> rawCells = (List<? extends Map<String, Object>>) rawRow.get("f");
        cells = rawCells;
    }
    checkState(cells.size() == fields.size(), "Expected that the row has the same number of cells %s as fields in the schema %s", cells.size(), fields.size());
    // Loop through all the fields in the row, normalizing their types with the TableFieldSchema
    // and storing the normalized values by field name in the Map<String, Object> that
    // underlies the TableRow.
    Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
    Iterator<TableFieldSchema> fieldIt = fields.iterator();
    while (cellIt.hasNext()) {
        Map<String, Object> cell = cellIt.next();
        TableFieldSchema fieldSchema = fieldIt.next();
        // Convert the object in this cell to the Java type corresponding to its type in the schema.
        Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
        String fieldName = fieldSchema.getName();
        checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName), "BigQueryIO does not support records with columns named %s", fieldName);
        if (convertedValue == null) {
            // intentionally omits columns with null values.
            continue;
        }
        row.set(fieldName, convertedValue);
    }
    return row;
}
Also used : TableRow(com.google.api.services.bigquery.model.TableRow) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TableDataList(com.google.api.services.bigquery.model.TableDataList) Map(java.util.Map) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 47 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOIT method testWrite.

private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
    Pipeline pipeline = Pipeline.create(options);
    BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
    pipeline.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))).apply("Map records", ParDo.of(new MapKVToV())).apply("Write to BQ", writeIO.to(tableQualifier).withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)).withMethod(method).withSchema(new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName("data").setType("BYTES")))));
    PipelineResult pipelineResult = pipeline.run();
    pipelineResult.waitUntilFinish();
    extractAndPublishTime(pipelineResult, metricName);
}
Also used : SyntheticBoundedSource(org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource) TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) TableSchema(com.google.api.services.bigquery.model.TableSchema) PipelineResult(org.apache.beam.sdk.PipelineResult) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Pipeline(org.apache.beam.sdk.Pipeline)

Example 48 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryUtilsTest method testToTableSchema_map.

@Test
public void testToTableSchema_map() {
    TableSchema schema = toTableSchema(MAP_MAP_TYPE);
    assertThat(schema.getFields().size(), equalTo(1));
    TableFieldSchema field = schema.getFields().get(0);
    assertThat(field.getName(), equalTo("map"));
    assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
    assertThat(field.getMode(), equalTo(Mode.REPEATED.toString()));
    assertThat(field.getFields(), containsInAnyOrder(MAP_KEY, MAP_VALUE));
}
Also used : BigQueryUtils.toTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.toTableSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 49 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryUtilsTest method testToTableSchema_array_row.

@Test
public void testToTableSchema_array_row() {
    TableSchema schema = toTableSchema(ARRAY_ROW_TYPE);
    assertThat(schema.getFields().size(), equalTo(1));
    TableFieldSchema field = schema.getFields().get(0);
    assertThat(field.getName(), equalTo("rows"));
    assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
    assertThat(field.getMode(), equalTo(Mode.REPEATED.toString()));
    assertThat(field.getFields(), containsInAnyOrder(ID, VALUE, NAME, TIMESTAMP_VARIANT1, TIMESTAMP_VARIANT2, TIMESTAMP_VARIANT3, TIMESTAMP_VARIANT4, DATETIME, DATETIME_0MS, DATETIME_0S_NS, DATETIME_0S_0NS, DATE, TIME, TIME_0MS, TIME_0S_NS, TIME_0S_0NS, VALID, BINARY, NUMERIC, BOOLEAN, LONG, DOUBLE));
}
Also used : BigQueryUtils.toTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryUtils.toTableSchema) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Test(org.junit.Test)

Example 50 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryHllSketchCompatibilityIT method prepareDatasetAndDataTables.

@BeforeClass
public static void prepareDatasetAndDataTables() throws Exception {
    BIGQUERY_CLIENT.createNewDataset(PROJECT_ID, DATASET_ID);
    TableSchema dataTableSchema = new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName(DATA_FIELD_NAME).setType(DATA_FIELD_TYPE)));
    Table dataTableNonEmpty = new Table().setSchema(dataTableSchema).setTableReference(new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET_ID).setTableId(DATA_TABLE_ID_NON_EMPTY));
    BIGQUERY_CLIENT.createNewTable(PROJECT_ID, DATASET_ID, dataTableNonEmpty);
    // Prepopulates dataTableNonEmpty with TEST_DATA
    List<Map<String, Object>> rows = TEST_DATA.stream().map(v -> Collections.singletonMap(DATA_FIELD_NAME, (Object) v)).collect(Collectors.toList());
    BIGQUERY_CLIENT.insertDataToTable(PROJECT_ID, DATASET_ID, DATA_TABLE_ID_NON_EMPTY, rows);
    Table dataTableEmpty = new Table().setSchema(dataTableSchema).setTableReference(new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET_ID).setTableId(DATA_TABLE_ID_EMPTY));
    BIGQUERY_CLIENT.createNewTable(PROJECT_ID, DATASET_ID, dataTableEmpty);
}
Also used : Arrays(java.util.Arrays) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) BigqueryMatcher.queryResultHasChecksum(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.queryResultHasChecksum) Date(java.util.Date) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) SchemaAndRecord(org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord) ByteBuffer(java.nio.ByteBuffer) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TableRow(com.google.api.services.bigquery.model.TableRow) TableSchema(com.google.api.services.bigquery.model.TableSchema) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) BigqueryClient(org.apache.beam.sdk.io.gcp.testing.BigqueryClient) TableReference(com.google.api.services.bigquery.model.TableReference) AfterClass(org.junit.AfterClass) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) PAssert(org.apache.beam.sdk.testing.PAssert) BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) ApplicationNameOptions(org.apache.beam.sdk.options.ApplicationNameOptions) Method(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Table(com.google.api.services.bigquery.model.Table) DataFormat(com.google.cloud.bigquery.storage.v1.DataFormat) List(java.util.List) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) BigqueryMatcher.createQueryUsingStandardSql(org.apache.beam.sdk.io.gcp.testing.BigqueryMatcher.createQueryUsingStandardSql) Collections(java.util.Collections) TableReference(com.google.api.services.bigquery.model.TableReference) Table(com.google.api.services.bigquery.model.Table) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Map(java.util.Map) BeforeClass(org.junit.BeforeClass)

Aggregations

TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)80 TableSchema (com.google.api.services.bigquery.model.TableSchema)71 TableRow (com.google.api.services.bigquery.model.TableRow)56 Test (org.junit.Test)45 Table (com.google.api.services.bigquery.model.Table)25 TableReference (com.google.api.services.bigquery.model.TableReference)23 ArrayList (java.util.ArrayList)17 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)16 List (java.util.List)15 Map (java.util.Map)15 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)13 Pipeline (org.apache.beam.sdk.Pipeline)12 ByteString (com.google.protobuf.ByteString)10 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)10 Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)10 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)10 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)9 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)9 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)8