use of in project beam by apache.
the class BigQueryIOTest method testWriteWithDynamicTables.
public void testWriteWithDynamicTables(boolean streaming) throws Exception {
BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
FakeDatasetService datasetService = new FakeDatasetService();
datasetService.createDataset("project-id", "dataset-id", "", "");
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
List<Integer> inserts = new ArrayList<>();
for (int i = 0; i < 10; i++) {
// Create a windowing strategy that puts the input into five different windows depending on
// record value.
WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {
public String apply(Integer i) {
return Integer.toString(i % 5);
final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
Map<String, String> schemas = Maps.newHashMap();
for (int i = 0; i < 5; i++) {
TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
targetTables.put(i, destination);
// Make sure each target table has its own custom table.
schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {
public TableDestination apply(ValueInSingleWindow<Integer> input) {
PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
// Check that we can access the element as well here and that it matches the window.
checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
return targetTables.get(input.getValue() % 5);
Pipeline p = TestPipeline.create(bqOptions);
PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
if (streaming) {
input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {
public TableRow apply(Integer i) {
return new TableRow().set("name", "number" + i).set("number", i);
for (int i = 0; i < 5; ++i) {
String tableId = String.format("table-id-%d", i);
String tableSpec = String.format("project-id:dataset-id.%s", tableId);
// Verify that table was created with the correct schema.
assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
// Verify that the table has the expected contents.
assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
use of in project beam by apache.
the class BigQueryTableRowIterator method getTypedCellValue.
* Adjusts a field returned from the BigQuery API to match what we will receive when running
* BigQuery's export-to-GCS and parallel read, which is the efficient parallel implementation
* used for batch jobs executed on the Beam Runners that perform initial splitting.
* <p>The following is the relationship between BigQuery schema and Java types:
* <ul>
* <li>Nulls are {@code null}.
* <li>Repeated fields are {@code List} of objects.
* <li>Record columns are {@link TableRow} objects.
* <li>{@code BOOLEAN} columns are JSON booleans, hence Java {@code Boolean} objects.
* <li>{@code FLOAT} columns are JSON floats, hence Java {@code Double} objects.
* <li>{@code TIMESTAMP} columns are {@code String} objects that are of the format
* {@code yyyy-MM-dd HH:mm:ss[.SSSSSS] UTC}, where the {@code .SSSSSS} has no trailing
* zeros and can be 1 to 6 digits long.
* <li>Every other atomic type is a {@code String}.
* </ul>
* <p>Note that integers are encoded as strings to match BigQuery's exported JSON format.
* <p>Finally, values are stored in the {@link TableRow} as {"field name": value} pairs
* and are not accessible through the {@link TableRow#getF} function.
private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
if (Data.isNull(v)) {
return null;
if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
@SuppressWarnings("unchecked") List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
ImmutableList.Builder<Object> values = ImmutableList.builder();
for (Map<String, Object> element : rawCells) {
values.add(getTypedCellValue(elementSchema, element.get("v")));
if (fieldSchema.getType().equals("RECORD")) {
@SuppressWarnings("unchecked") Map<String, Object> typedV = (Map<String, Object>) v;
return getTypedTableRow(fieldSchema.getFields(), typedV);
if (fieldSchema.getType().equals("FLOAT")) {
return Double.parseDouble((String) v);
if (fieldSchema.getType().equals("BOOLEAN")) {
return Boolean.parseBoolean((String) v);
if (fieldSchema.getType().equals("TIMESTAMP")) {
return BigQueryAvroUtils.formatTimestamp((String) v);
// 1. String, 2. base64 encoded BYTES, 3. DATE, DATETIME, TIME strings.
return v;
use of in project google-cloud-java by GoogleCloudPlatform.
the class Field method toPb.
TableFieldSchema toPb() {
TableFieldSchema fieldSchemaPb = new TableFieldSchema();
if (mode != null) {
if (description != null) {
if (getFields() != null) {
List<TableFieldSchema> fieldsPb = Lists.transform(getFields(), TO_PB_FUNCTION);
return fieldSchemaPb;
use of in project components by Talend.
the class BigQueryAvroRegistry method guessBigQuerySchema.
public TableSchema guessBigQuerySchema(org.apache.avro.Schema schema) {
List<org.apache.avro.Schema.Field> fields = schema.getFields();
if (fields.size() == 0) {
return null;
List<TableFieldSchema> bqFields = new ArrayList<>();
for (org.apache.avro.Schema.Field field : fields) {
return new TableSchema().setFields(bqFields);
use of in project components by Talend.
the class BigQueryAvroRegistry method tryArrayFieldSchema.
private TableFieldSchema tryArrayFieldSchema(org.apache.avro.Schema.Field field) {
String fieldName =;
TableFieldSchema tableFieldSchema = new TableFieldSchema().setName(fieldName);
boolean nullable = AvroUtils.isNullable(field.schema());
if (!nullable) {
tableFieldSchema = tableFieldSchema.setMode(REQUIRED_MODE);
org.apache.avro.Schema fieldSchema = AvroUtils.unwrapIfNullable(field.schema());
if (fieldSchema.getType() == org.apache.avro.Schema.Type.ARRAY) {
return tryFieldSchema(tableFieldSchema.setMode(REPEATED_MODE), fieldSchema.getElementType());
return tryFieldSchema(tableFieldSchema, fieldSchema);