use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryUtils method fromTableFieldSchema.
private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas, SchemaConversionOptions options) {
Schema.Builder schemaBuilder = Schema.builder();
for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
FieldType fieldType = fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields(), options);
Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent() && !fieldType.getTypeName().isMapType()) {
fieldType = FieldType.array(fieldType);
}
// if the mode is not defined or if it is set to NULLABLE, then the field is nullable
boolean nullable = !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
if (tableFieldSchema.getDescription() != null && !"".equals(tableFieldSchema.getDescription())) {
field = field.withDescription(tableFieldSchema.getDescription());
}
schemaBuilder.addField(field);
}
return schemaBuilder.build();
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigQueryUtils method toTableFieldSchema.
private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
for (Field schemaField : schema.getFields()) {
FieldType type = schemaField.getType();
TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
field.setDescription(schemaField.getDescription());
}
if (!schemaField.getType().getNullable()) {
field.setMode(Mode.REQUIRED.toString());
}
if (type.getTypeName().isCollectionType()) {
type = type.getCollectionElementType();
if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
}
field.setMode(Mode.REPEATED.toString());
}
if (TypeName.ROW == type.getTypeName()) {
Schema subType = type.getRowSchema();
field.setFields(toTableFieldSchema(subType));
}
if (TypeName.MAP == type.getTypeName()) {
Schema mapSchema = Schema.builder().addField(BIGQUERY_MAP_KEY_FIELD_NAME, type.getMapKeyType()).addField(BIGQUERY_MAP_VALUE_FIELD_NAME, type.getMapValueType()).build();
type = FieldType.row(mapSchema);
field.setFields(toTableFieldSchema(mapSchema));
field.setMode(Mode.REPEATED.toString());
}
field.setType(toStandardSQLTypeName(type).toString());
fields.add(field);
}
return fields;
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigqueryClient method getTypedTableRow.
private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
TableRow row;
List<? extends Map<String, Object>> cells;
if (rawRow instanceof TableRow) {
// Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
// any type conversion, but extract the cells for cell-wise processing below.
row = (TableRow) rawRow;
cells = row.getF();
// Clear the cells from the row, so that row.getF() will return null. This matches the
// behavior of rows produced by the BigQuery export API used on the service.
row.setF(null);
} else {
row = new TableRow();
// Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
// get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
// we will use Map.get("v") instead of TableCell.getV() get its value.
@SuppressWarnings("unchecked") List<? extends Map<String, Object>> rawCells = (List<? extends Map<String, Object>>) rawRow.get("f");
cells = rawCells;
}
checkState(cells.size() == fields.size(), "Expected that the row has the same number of cells %s as fields in the schema %s", cells.size(), fields.size());
// Loop through all the fields in the row, normalizing their types with the TableFieldSchema
// and storing the normalized values by field name in the Map<String, Object> that
// underlies the TableRow.
Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
Iterator<TableFieldSchema> fieldIt = fields.iterator();
while (cellIt.hasNext()) {
Map<String, Object> cell = cellIt.next();
TableFieldSchema fieldSchema = fieldIt.next();
// Convert the object in this cell to the Java type corresponding to its type in the schema.
Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
String fieldName = fieldSchema.getName();
checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName), "BigQueryIO does not support records with columns named %s", fieldName);
if (convertedValue == null) {
// intentionally omits columns with null values.
continue;
}
row.set(fieldName, convertedValue);
}
return row;
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class BigqueryClient method getTypedCellValue.
@Nullable
private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
if (Data.isNull(v)) {
return null;
}
if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
@SuppressWarnings("unchecked") List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
ImmutableList.Builder<Object> values = ImmutableList.builder();
for (Map<String, Object> element : rawCells) {
values.add(getTypedCellValue(elementSchema, element.get("v")));
}
return values.build();
}
if ("RECORD".equals(fieldSchema.getType())) {
@SuppressWarnings("unchecked") Map<String, Object> typedV = (Map<String, Object>) v;
return getTypedTableRow(fieldSchema.getFields(), typedV);
}
if ("FLOAT".equals(fieldSchema.getType())) {
return Double.parseDouble((String) v);
}
if ("BOOLEAN".equals(fieldSchema.getType())) {
return Boolean.parseBoolean((String) v);
}
if ("TIMESTAMP".equals(fieldSchema.getType())) {
return (String) v;
}
// 1. String, 2. base64 encoded BYTES, 3. DATE, DATETIME, TIME strings.
return v;
}
use of com.google.api.services.bigquery.model.TableFieldSchema in project beam by apache.
the class TableRowToStorageApiProto method fieldDescriptorFromTableField.
private static void fieldDescriptorFromTableField(TableFieldSchema fieldSchema, int fieldNumber, DescriptorProto.Builder descriptorBuilder) {
FieldDescriptorProto.Builder fieldDescriptorBuilder = FieldDescriptorProto.newBuilder();
fieldDescriptorBuilder = fieldDescriptorBuilder.setName(fieldSchema.getName().toLowerCase());
fieldDescriptorBuilder = fieldDescriptorBuilder.setNumber(fieldNumber);
switch(fieldSchema.getType()) {
case "STRUCT":
case "RECORD":
DescriptorProto nested = descriptorSchemaFromTableFieldSchemas(fieldSchema.getFields());
descriptorBuilder.addNestedType(nested);
fieldDescriptorBuilder = fieldDescriptorBuilder.setType(Type.TYPE_MESSAGE).setTypeName(nested.getName());
break;
default:
@Nullable Type type = PRIMITIVE_TYPES.get(fieldSchema.getType());
if (type == null) {
throw new UnsupportedOperationException("Converting BigQuery type " + fieldSchema.getType() + " to Beam type is unsupported");
}
fieldDescriptorBuilder = fieldDescriptorBuilder.setType(type);
}
Optional<Mode> fieldMode = Optional.ofNullable(fieldSchema.getMode()).map(Mode::valueOf);
if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) {
fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REPEATED);
} else if (!fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent()) {
fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_OPTIONAL);
} else {
fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REQUIRED);
}
descriptorBuilder.addField(fieldDescriptorBuilder.build());
}
Aggregations