Search in sources :

Example 6 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryIOTest method testWriteWithDynamicTables.

public void testWriteWithDynamicTables(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    datasetService.createDataset("project-id", "dataset-id", "", "");
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
    List<Integer> inserts = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        inserts.add(i);
    }
    // Create a windowing strategy that puts the input into five different windows depending on
    // record value.
    WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return Integer.toString(i % 5);
        }
    });
    final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
    Map<String, String> schemas = Maps.newHashMap();
    for (int i = 0; i < 5; i++) {
        TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
        targetTables.put(i, destination);
        // Make sure each target table has its own custom table.
        schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
    }
    SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {

        @Override
        public TableDestination apply(ValueInSingleWindow<Integer> input) {
            PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
            // Check that we can access the element as well here and that it matches the window.
            checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
            return targetTables.get(input.getValue() % 5);
        }
    };
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
    if (streaming) {
        input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
    input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {

        @Override
        public TableRow apply(Integer i) {
            return new TableRow().set("name", "number" + i).set("number", i);
        }
    }).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    for (int i = 0; i < 5; ++i) {
        String tableId = String.format("table-id-%d", i);
        String tableSpec = String.format("project-id:dataset-id.%s", tableId);
        // Verify that table was created with the correct schema.
        assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
        // Verify that the table has the expected contents.
        assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
    }
}
Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ArrayList(java.util.ArrayList) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 7 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.

the class BigQueryTableRowIterator method getTypedCellValue.

/**
   * Adjusts a field returned from the BigQuery API to match what we will receive when running
   * BigQuery's export-to-GCS and parallel read, which is the efficient parallel implementation
   * used for batch jobs executed on the Beam Runners that perform initial splitting.
   *
   * <p>The following is the relationship between BigQuery schema and Java types:
   *
   * <ul>
   *   <li>Nulls are {@code null}.
   *   <li>Repeated fields are {@code List} of objects.
   *   <li>Record columns are {@link TableRow} objects.
   *   <li>{@code BOOLEAN} columns are JSON booleans, hence Java {@code Boolean} objects.
   *   <li>{@code FLOAT} columns are JSON floats, hence Java {@code Double} objects.
   *   <li>{@code TIMESTAMP} columns are {@code String} objects that are of the format
   *       {@code yyyy-MM-dd HH:mm:ss[.SSSSSS] UTC}, where the {@code .SSSSSS} has no trailing
   *       zeros and can be 1 to 6 digits long.
   *   <li>Every other atomic type is a {@code String}.
   * </ul>
   *
   * <p>Note that integers are encoded as strings to match BigQuery's exported JSON format.
   *
   * <p>Finally, values are stored in the {@link TableRow} as {"field name": value} pairs
   * and are not accessible through the {@link TableRow#getF} function.
   */
@Nullable
private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
    if (Data.isNull(v)) {
        return null;
    }
    if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
        TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
        @SuppressWarnings("unchecked") List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
        ImmutableList.Builder<Object> values = ImmutableList.builder();
        for (Map<String, Object> element : rawCells) {
            values.add(getTypedCellValue(elementSchema, element.get("v")));
        }
        return values.build();
    }
    if (fieldSchema.getType().equals("RECORD")) {
        @SuppressWarnings("unchecked") Map<String, Object> typedV = (Map<String, Object>) v;
        return getTypedTableRow(fieldSchema.getFields(), typedV);
    }
    if (fieldSchema.getType().equals("FLOAT")) {
        return Double.parseDouble((String) v);
    }
    if (fieldSchema.getType().equals("BOOLEAN")) {
        return Boolean.parseBoolean((String) v);
    }
    if (fieldSchema.getType().equals("TIMESTAMP")) {
        return BigQueryAvroUtils.formatTimestamp((String) v);
    }
    // 1. String, 2. base64 encoded BYTES, 3. DATE, DATETIME, TIME strings.
    return v;
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) TableDataList(com.google.api.services.bigquery.model.TableDataList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Map(java.util.Map) Nullable(javax.annotation.Nullable)

Example 8 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project google-cloud-java by GoogleCloudPlatform.

the class Field method toPb.

TableFieldSchema toPb() {
    TableFieldSchema fieldSchemaPb = new TableFieldSchema();
    fieldSchemaPb.setName(name);
    fieldSchemaPb.setType(type.getValue().name());
    if (mode != null) {
        fieldSchemaPb.setMode(mode);
    }
    if (description != null) {
        fieldSchemaPb.setDescription(description);
    }
    if (getFields() != null) {
        List<TableFieldSchema> fieldsPb = Lists.transform(getFields(), TO_PB_FUNCTION);
        fieldSchemaPb.setFields(fieldsPb);
    }
    return fieldSchemaPb;
}
Also used : TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 9 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project components by Talend.

the class BigQueryAvroRegistry method guessBigQuerySchema.

public TableSchema guessBigQuerySchema(org.apache.avro.Schema schema) {
    List<org.apache.avro.Schema.Field> fields = schema.getFields();
    if (fields.size() == 0) {
        return null;
    }
    List<TableFieldSchema> bqFields = new ArrayList<>();
    for (org.apache.avro.Schema.Field field : fields) {
        bqFields.add(tryArrayFieldSchema(field));
    }
    return new TableSchema().setFields(bqFields);
}
Also used : Field(com.google.cloud.bigquery.Field) TableSchema(com.google.api.services.bigquery.model.TableSchema) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(com.google.cloud.bigquery.Schema) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Example 10 with TableFieldSchema

use of com.google.api.services.bigquery.model.TableFieldSchema in project components by Talend.

the class BigQueryAvroRegistry method tryArrayFieldSchema.

private TableFieldSchema tryArrayFieldSchema(org.apache.avro.Schema.Field field) {
    String fieldName = field.name();
    TableFieldSchema tableFieldSchema = new TableFieldSchema().setName(fieldName);
    boolean nullable = AvroUtils.isNullable(field.schema());
    if (!nullable) {
        tableFieldSchema = tableFieldSchema.setMode(REQUIRED_MODE);
    }
    org.apache.avro.Schema fieldSchema = AvroUtils.unwrapIfNullable(field.schema());
    if (fieldSchema.getType() == org.apache.avro.Schema.Type.ARRAY) {
        return tryFieldSchema(tableFieldSchema.setMode(REPEATED_MODE), fieldSchema.getElementType());
    }
    return tryFieldSchema(tableFieldSchema, fieldSchema);
}
Also used : TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema)

Aggregations

TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)80 TableSchema (com.google.api.services.bigquery.model.TableSchema)71 TableRow (com.google.api.services.bigquery.model.TableRow)56 Test (org.junit.Test)45 Table (com.google.api.services.bigquery.model.Table)25 TableReference (com.google.api.services.bigquery.model.TableReference)23 ArrayList (java.util.ArrayList)17 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)16 List (java.util.List)15 Map (java.util.Map)15 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)13 Pipeline (org.apache.beam.sdk.Pipeline)12 ByteString (com.google.protobuf.ByteString)10 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)10 Write (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write)10 Method (org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)10 BigQueryResourceNaming.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryResourceNaming.createTempTableReference)9 FakeBigQueryServices (org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices)9 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)8