use of org.apache.spark.sql.types.BinaryType in project spark-bigquery-connector by GoogleCloudDataproc.
the class AvroSchemaConverter method createConverterFor.
static Converter createConverterFor(DataType sparkType, Schema avroType) {
if (sparkType instanceof NullType && avroType.getType() == Schema.Type.NULL) {
return (getter, ordinal) -> null;
}
if (sparkType instanceof BooleanType && avroType.getType() == Schema.Type.BOOLEAN) {
return (getter, ordinal) -> getter.getBoolean(ordinal);
}
if (sparkType instanceof ByteType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getByte(ordinal));
}
if (sparkType instanceof ShortType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getShort(ordinal));
}
if (sparkType instanceof IntegerType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getInt(ordinal));
}
if (sparkType instanceof LongType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> getter.getLong(ordinal);
}
if (sparkType instanceof FloatType && avroType.getType() == Schema.Type.DOUBLE) {
return (getter, ordinal) -> Double.valueOf(getter.getFloat(ordinal));
}
if (sparkType instanceof DoubleType && avroType.getType() == Schema.Type.DOUBLE) {
return (getter, ordinal) -> getter.getDouble(ordinal);
}
if (sparkType instanceof DecimalType && avroType.getType() == Schema.Type.BYTES) {
DecimalType decimalType = (DecimalType) sparkType;
return (getter, ordinal) -> {
Decimal decimal = getter.getDecimal(ordinal, decimalType.precision(), decimalType.scale());
return DECIMAL_CONVERSIONS.toBytes(decimal.toJavaBigDecimal(), avroType, LogicalTypes.decimal(decimalType.precision(), decimalType.scale()));
};
}
if (sparkType instanceof StringType && avroType.getType() == Schema.Type.STRING) {
return (getter, ordinal) -> new Utf8(getter.getUTF8String(ordinal).getBytes());
}
if (sparkType instanceof BinaryType && avroType.getType() == Schema.Type.FIXED) {
int size = avroType.getFixedSize();
return (getter, ordinal) -> {
byte[] data = getter.getBinary(ordinal);
if (data.length != size) {
throw new IllegalArgumentException(String.format("Cannot write %s bytes of binary data into FIXED Type with size of %s bytes", data.length, size));
}
return new GenericData.Fixed(avroType, data);
};
}
if (sparkType instanceof BinaryType && avroType.getType() == Schema.Type.BYTES) {
return (getter, ordinal) -> ByteBuffer.wrap(getter.getBinary(ordinal));
}
if (sparkType instanceof DateType && avroType.getType() == Schema.Type.INT) {
return (getter, ordinal) -> getter.getInt(ordinal);
}
if (sparkType instanceof TimestampType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> getter.getLong(ordinal);
}
if (sparkType instanceof ArrayType && avroType.getType() == Schema.Type.ARRAY) {
DataType et = ((ArrayType) sparkType).elementType();
boolean containsNull = ((ArrayType) sparkType).containsNull();
Converter elementConverter = createConverterFor(et, resolveNullableType(avroType.getElementType(), containsNull));
return (getter, ordinal) -> {
ArrayData arrayData = getter.getArray(ordinal);
int len = arrayData.numElements();
Object[] result = new Object[len];
for (int i = 0; i < len; i++) {
if (containsNull && arrayData.isNullAt(i)) {
result[i] = null;
} else {
result[i] = elementConverter.convert(arrayData, i);
}
}
// `ArrayList` backed by the specified array without data copying.
return java.util.Arrays.asList(result);
};
}
if (sparkType instanceof StructType && avroType.getType() == Schema.Type.RECORD) {
StructType sparkStruct = (StructType) sparkType;
StructConverter structConverter = new StructConverter(sparkStruct, avroType);
int numFields = sparkStruct.length();
return (getter, ordinal) -> structConverter.convert(getter.getStruct(ordinal, numFields));
}
if (sparkType instanceof UserDefinedType) {
UserDefinedType userDefinedType = (UserDefinedType) sparkType;
return createConverterFor(userDefinedType.sqlType(), avroType);
}
throw new IllegalArgumentException(String.format("Cannot convert Catalyst type %s to Avro type %s", sparkType, avroType));
}
use of org.apache.spark.sql.types.BinaryType in project spark-bigquery-connector by GoogleCloudDataproc.
the class AvroSchemaConverter method sparkTypeToRawAvroType.
static Schema sparkTypeToRawAvroType(DataType dataType, String recordName, SchemaBuilder.TypeBuilder<Schema> builder) {
if (dataType instanceof BinaryType) {
return builder.bytesType();
}
if (dataType instanceof ByteType || dataType instanceof ShortType || dataType instanceof IntegerType || dataType instanceof LongType) {
return builder.longType();
}
if (dataType instanceof BooleanType) {
return builder.booleanType();
}
if (dataType instanceof FloatType || dataType instanceof DoubleType) {
return builder.doubleType();
}
if (dataType instanceof DecimalType) {
DecimalType decimalType = (DecimalType) dataType;
if (decimalType.precision() <= SchemaConverters.BQ_NUMERIC_PRECISION && decimalType.scale() <= SchemaConverters.BQ_NUMERIC_SCALE) {
return LogicalTypes.decimal(decimalType.precision(), decimalType.scale()).addToSchema(builder.bytesType());
} else {
throw new IllegalArgumentException("Decimal type is too wide to fit in BigQuery Numeric format");
}
}
if (dataType instanceof StringType) {
return builder.stringType();
}
if (dataType instanceof TimestampType) {
// team adds microsecond support to their backend
return LogicalTypes.timestampMicros().addToSchema(builder.longType());
}
if (dataType instanceof DateType) {
return LogicalTypes.date().addToSchema(builder.intType());
}
if (dataType instanceof ArrayType) {
return builder.array().items(sparkTypeToRawAvroType(((ArrayType) dataType).elementType(), ((ArrayType) dataType).containsNull(), recordName));
}
if (dataType instanceof StructType) {
SchemaBuilder.FieldAssembler<Schema> fieldsAssembler = builder.record(recordName).fields();
for (StructField field : ((StructType) dataType).fields()) {
Schema avroType = sparkTypeToRawAvroType(field.dataType(), field.nullable(), field.name());
fieldsAssembler.name(field.name()).type(avroType).noDefault();
}
return fieldsAssembler.endRecord();
}
if (dataType instanceof UserDefinedType) {
DataType userDefinedType = ((UserDefinedType) dataType).sqlType();
return sparkTypeToRawAvroType(userDefinedType, recordName, builder);
}
if (dataType instanceof MapType) {
throw new IllegalArgumentException(SchemaConverters.MAPTYPE_ERROR_MESSAGE);
} else {
throw new IllegalArgumentException("Data type not supported: " + dataType.simpleString());
}
}
Aggregations