use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.
the class BigQueryUtils method fromBeamField.
@Nullable
private static Object fromBeamField(FieldType fieldType, Object fieldValue) {
if (fieldValue == null) {
if (!fieldType.getNullable()) {
throw new IllegalArgumentException("Field is not nullable.");
}
return null;
}
switch(fieldType.getTypeName()) {
case ARRAY:
case ITERABLE:
FieldType elementType = fieldType.getCollectionElementType();
Iterable<?> items = (Iterable<?>) fieldValue;
List<Object> convertedItems = Lists.newArrayListWithCapacity(Iterables.size(items));
for (Object item : items) {
convertedItems.add(fromBeamField(elementType, item));
}
return convertedItems;
case MAP:
FieldType keyElementType = fieldType.getMapKeyType();
FieldType valueElementType = fieldType.getMapValueType();
Map<?, ?> pairs = (Map<?, ?>) fieldValue;
convertedItems = Lists.newArrayListWithCapacity(pairs.size());
for (Map.Entry<?, ?> pair : pairs.entrySet()) {
convertedItems.add(new TableRow().set(BIGQUERY_MAP_KEY_FIELD_NAME, fromBeamField(keyElementType, pair.getKey())).set(BIGQUERY_MAP_VALUE_FIELD_NAME, fromBeamField(valueElementType, pair.getValue())));
}
return convertedItems;
case ROW:
return toTableRow((Row) fieldValue);
case DATETIME:
return ((Instant) fieldValue).toDateTime(DateTimeZone.UTC).toString(BIGQUERY_TIMESTAMP_PRINTER);
case INT16:
case INT32:
case INT64:
case FLOAT:
case DOUBLE:
case STRING:
case BOOLEAN:
case DECIMAL:
return fieldValue.toString();
case BYTES:
return BaseEncoding.base64().encode((byte[]) fieldValue);
case LOGICAL_TYPE:
// For the JSON formats of DATE/DATETIME/TIME/TIMESTAMP types that BigQuery accepts, see
// https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json#details_of_loading_json_data
String identifier = fieldType.getLogicalType().getIdentifier();
if (SqlTypes.DATE.getIdentifier().equals(identifier)) {
return fieldValue.toString();
} else if (SqlTypes.TIME.getIdentifier().equals(identifier)) {
// LocalTime.toString() drops seconds if it is zero (see
// https://docs.oracle.com/javase/8/docs/api/java/time/LocalTime.html#toString--).
// but BigQuery TIME requires seconds
// (https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type).
// Fractional seconds are optional so drop them to conserve number of bytes transferred.
LocalTime localTime = (LocalTime) fieldValue;
@SuppressWarnings(// Suppression is justified because seconds are always
"JavaLocalTimeGetNano") java.time.format.DateTimeFormatter // outputted.
localTimeFormatter = (0 == localTime.getNano()) ? ISO_LOCAL_TIME : BIGQUERY_TIME_FORMATTER;
return localTimeFormatter.format(localTime);
} else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
// Same rationale as SqlTypes.TIME
LocalDateTime localDateTime = (LocalDateTime) fieldValue;
@SuppressWarnings("JavaLocalDateTimeGetNano") java.time.format.DateTimeFormatter localDateTimeFormatter = (0 == localDateTime.getNano()) ? ISO_LOCAL_DATE_TIME : BIGQUERY_DATETIME_FORMATTER;
return localDateTimeFormatter.format(localDateTime);
} else if ("Enum".equals(identifier)) {
return fieldType.getLogicalType(EnumerationType.class).toString((EnumerationType.Value) fieldValue);
}
default:
return fieldValue.toString();
}
}
use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.
the class BigQueryUtils method fromTableFieldSchema.
private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas, SchemaConversionOptions options) {
Schema.Builder schemaBuilder = Schema.builder();
for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
FieldType fieldType = fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields(), options);
Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent() && !fieldType.getTypeName().isMapType()) {
fieldType = FieldType.array(fieldType);
}
// if the mode is not defined or if it is set to NULLABLE, then the field is nullable
boolean nullable = !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
if (tableFieldSchema.getDescription() != null && !"".equals(tableFieldSchema.getDescription())) {
field = field.withDescription(tableFieldSchema.getDescription());
}
schemaBuilder.addField(field);
}
return schemaBuilder.build();
}
use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.
the class BigQueryUtils method toBeamValue.
private static Object toBeamValue(FieldType fieldType, Object jsonBQValue) {
if (jsonBQValue instanceof String || jsonBQValue instanceof Number || jsonBQValue instanceof Boolean) {
String jsonBQString = jsonBQValue.toString();
if (JSON_VALUE_PARSERS.containsKey(fieldType.getTypeName())) {
return JSON_VALUE_PARSERS.get(fieldType.getTypeName()).apply(jsonBQString);
} else if (fieldType.isLogicalType(SqlTypes.DATETIME.getIdentifier())) {
return LocalDateTime.parse(jsonBQString, BIGQUERY_DATETIME_FORMATTER);
} else if (fieldType.isLogicalType(SqlTypes.DATE.getIdentifier())) {
return LocalDate.parse(jsonBQString);
} else if (fieldType.isLogicalType(SqlTypes.TIME.getIdentifier())) {
return LocalTime.parse(jsonBQString);
}
}
if (jsonBQValue instanceof List) {
return ((List<Object>) jsonBQValue).stream().map(v -> ((Map<String, Object>) v).get("v")).map(v -> toBeamValue(fieldType.getCollectionElementType(), v)).collect(toList());
}
if (jsonBQValue instanceof Map) {
TableRow tr = new TableRow();
tr.putAll((Map<String, Object>) jsonBQValue);
return toBeamRow(fieldType.getRowSchema(), tr);
}
throw new UnsupportedOperationException("Converting BigQuery type '" + jsonBQValue.getClass() + "' to '" + fieldType + "' is not supported");
}
use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.
the class BigQueryUtils method toTableFieldSchema.
private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
for (Field schemaField : schema.getFields()) {
FieldType type = schemaField.getType();
TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
field.setDescription(schemaField.getDescription());
}
if (!schemaField.getType().getNullable()) {
field.setMode(Mode.REQUIRED.toString());
}
if (type.getTypeName().isCollectionType()) {
type = type.getCollectionElementType();
if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
}
field.setMode(Mode.REPEATED.toString());
}
if (TypeName.ROW == type.getTypeName()) {
Schema subType = type.getRowSchema();
field.setFields(toTableFieldSchema(subType));
}
if (TypeName.MAP == type.getTypeName()) {
Schema mapSchema = Schema.builder().addField(BIGQUERY_MAP_KEY_FIELD_NAME, type.getMapKeyType()).addField(BIGQUERY_MAP_VALUE_FIELD_NAME, type.getMapValueType()).build();
type = FieldType.row(mapSchema);
field.setFields(toTableFieldSchema(mapSchema));
field.setMode(Mode.REPEATED.toString());
}
field.setType(toStandardSQLTypeName(type).toString());
fields.add(field);
}
return fields;
}
use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.
the class BeamRowToStorageApiProto method fieldDescriptorFromBeamField.
private static FieldDescriptorProto.Builder fieldDescriptorFromBeamField(Field field, int fieldNumber, List<DescriptorProto> nestedTypes) {
FieldDescriptorProto.Builder fieldDescriptorBuilder = FieldDescriptorProto.newBuilder();
fieldDescriptorBuilder = fieldDescriptorBuilder.setName(field.getName().toLowerCase());
fieldDescriptorBuilder = fieldDescriptorBuilder.setNumber(fieldNumber);
switch(field.getType().getTypeName()) {
case ROW:
@Nullable Schema rowSchema = field.getType().getRowSchema();
if (rowSchema == null) {
throw new RuntimeException("Unexpected null schema!");
}
DescriptorProto nested = descriptorSchemaFromBeamSchema(rowSchema);
nestedTypes.add(nested);
fieldDescriptorBuilder = fieldDescriptorBuilder.setType(Type.TYPE_MESSAGE).setTypeName(nested.getName());
break;
case ARRAY:
case ITERABLE:
@Nullable FieldType elementType = field.getType().getCollectionElementType();
if (elementType == null) {
throw new RuntimeException("Unexpected null element type!");
}
Preconditions.checkState(!Preconditions.checkNotNull(elementType.getTypeName()).isCollectionType(), "Nested arrays not supported by BigQuery.");
return fieldDescriptorFromBeamField(Field.of(field.getName(), elementType), fieldNumber, nestedTypes).setLabel(Label.LABEL_REPEATED);
case LOGICAL_TYPE:
@Nullable LogicalType<?, ?> logicalType = field.getType().getLogicalType();
if (logicalType == null) {
throw new RuntimeException("Unexpected null logical type " + field.getType());
}
@Nullable Type type = LOGICAL_TYPES.get(logicalType.getIdentifier());
if (type == null) {
throw new RuntimeException("Unsupported logical type " + field.getType());
}
fieldDescriptorBuilder = fieldDescriptorBuilder.setType(type);
break;
case MAP:
throw new RuntimeException("Map types not supported by BigQuery.");
default:
@Nullable Type primitiveType = PRIMITIVE_TYPES.get(field.getType().getTypeName());
if (primitiveType == null) {
throw new RuntimeException("Unsupported type " + field.getType());
}
fieldDescriptorBuilder = fieldDescriptorBuilder.setType(primitiveType);
}
if (field.getType().getNullable()) {
fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_OPTIONAL);
} else {
fieldDescriptorBuilder = fieldDescriptorBuilder.setLabel(Label.LABEL_REQUIRED);
}
return fieldDescriptorBuilder;
}
Aggregations