use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryTableRowIterator method getTypedTableRow.
/**
* Converts a row returned from the BigQuery JSON API as a {@code Map<String, Object>} into a
* Java {@link TableRow} with nested {@link TableCell TableCells}. The {@code Object} values in
* the cells are converted to Java types according to the provided field schemas.
*
* <p>See {@link #getTypedCellValue(TableFieldSchema, Object)} for details on how BigQuery
* types are mapped to Java types.
*/
private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
// If rawRow is a TableRow, use it. If not, create a new one.
TableRow row;
List<? extends Map<String, Object>> cells;
if (rawRow instanceof TableRow) {
// Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
// any type conversion, but extract the cells for cell-wise processing below.
row = (TableRow) rawRow;
cells = row.getF();
// Clear the cells from the row, so that row.getF() will return null. This matches the
// behavior of rows produced by the BigQuery export API used on the service.
row.setF(null);
} else {
row = new TableRow();
// Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
// get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
// we will use Map.get("v") instead of TableCell.getV() get its value.
@SuppressWarnings("unchecked") List<? extends Map<String, Object>> rawCells = (List<? extends Map<String, Object>>) rawRow.get("f");
cells = rawCells;
}
checkState(cells.size() == fields.size(), "Expected that the row has the same number of cells %s as fields in the schema %s", cells.size(), fields.size());
// Loop through all the fields in the row, normalizing their types with the TableFieldSchema
// and storing the normalized values by field name in the Map<String, Object> that
// underlies the TableRow.
Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
Iterator<TableFieldSchema> fieldIt = fields.iterator();
while (cellIt.hasNext()) {
Map<String, Object> cell = cellIt.next();
TableFieldSchema fieldSchema = fieldIt.next();
// Convert the object in this cell to the Java type corresponding to its type in the schema.
Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
String fieldName = fieldSchema.getName();
checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName), "BigQueryIO does not support records with columns named %s", fieldName);
if (convertedValue == null) {
// intentionally omits columns with null values.
continue;
}
row.set(fieldName, convertedValue);
}
return row;
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryIOIT method testWrite.
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
Pipeline pipeline = Pipeline.create(options);
BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
pipeline.apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName))).apply("Map records", ParDo.of(new MapKVToV())).apply("Write to BQ", writeIO.to(tableQualifier).withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot)).withMethod(method).withSchema(new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName("data").setType("BYTES")))));
PipelineResult pipelineResult = pipeline.run();
pipelineResult.waitUntilFinish();
extractAndPublishTime(pipelineResult, metricName);
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryUtilsTest method testToTableSchema_map.
@Test
public void testToTableSchema_map() {
TableSchema schema = toTableSchema(MAP_MAP_TYPE);
assertThat(schema.getFields().size(), equalTo(1));
TableFieldSchema field = schema.getFields().get(0);
assertThat(field.getName(), equalTo("map"));
assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
assertThat(field.getMode(), equalTo(Mode.REPEATED.toString()));
assertThat(field.getFields(), containsInAnyOrder(MAP_KEY, MAP_VALUE));
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryUtilsTest method testToTableSchema_array_row.
@Test
public void testToTableSchema_array_row() {
TableSchema schema = toTableSchema(ARRAY_ROW_TYPE);
assertThat(schema.getFields().size(), equalTo(1));
TableFieldSchema field = schema.getFields().get(0);
assertThat(field.getName(), equalTo("rows"));
assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
assertThat(field.getMode(), equalTo(Mode.REPEATED.toString()));
assertThat(field.getFields(), containsInAnyOrder(ID, VALUE, NAME, TIMESTAMP_VARIANT1, TIMESTAMP_VARIANT2, TIMESTAMP_VARIANT3, TIMESTAMP_VARIANT4, DATETIME, DATETIME_0MS, DATETIME_0S_NS, DATETIME_0S_0NS, DATE, TIME, TIME_0MS, TIME_0S_NS, TIME_0S_0NS, VALID, BINARY, NUMERIC, BOOLEAN, LONG, DOUBLE));
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryHllSketchCompatibilityIT method prepareDatasetAndDataTables.
@BeforeClass
public static void prepareDatasetAndDataTables() throws Exception {
BIGQUERY_CLIENT.createNewDataset(PROJECT_ID, DATASET_ID);
TableSchema dataTableSchema = new TableSchema().setFields(Collections.singletonList(new TableFieldSchema().setName(DATA_FIELD_NAME).setType(DATA_FIELD_TYPE)));
Table dataTableNonEmpty = new Table().setSchema(dataTableSchema).setTableReference(new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET_ID).setTableId(DATA_TABLE_ID_NON_EMPTY));
BIGQUERY_CLIENT.createNewTable(PROJECT_ID, DATASET_ID, dataTableNonEmpty);
// Prepopulates dataTableNonEmpty with TEST_DATA
List<Map<String, Object>> rows = TEST_DATA.stream().map(v -> Collections.singletonMap(DATA_FIELD_NAME, (Object) v)).collect(Collectors.toList());
BIGQUERY_CLIENT.insertDataToTable(PROJECT_ID, DATASET_ID, DATA_TABLE_ID_NON_EMPTY, rows);
Table dataTableEmpty = new Table().setSchema(dataTableSchema).setTableReference(new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET_ID).setTableId(DATA_TABLE_ID_EMPTY));
BIGQUERY_CLIENT.createNewTable(PROJECT_ID, DATASET_ID, dataTableEmpty);
}
Aggregations