use of org.apache.avro.LogicalType in project nifi by apache.
the class AvroTypeUtil method convertToAvroObject.
@SuppressWarnings("unchecked")
private static Object convertToAvroObject(final Object rawValue, final Schema fieldSchema, final String fieldName) {
if (rawValue == null) {
return null;
}
switch(fieldSchema.getType()) {
case INT:
{
final LogicalType logicalType = fieldSchema.getLogicalType();
if (logicalType == null) {
return DataTypeUtils.toInteger(rawValue, fieldName);
}
if (LOGICAL_TYPE_DATE.equals(logicalType.getName())) {
final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
final Date date = DataTypeUtils.toDate(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
final Duration duration = Duration.between(new Date(0L).toInstant(), new Date(date.getTime()).toInstant());
final long days = duration.toDays();
return (int) days;
} else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalType.getName())) {
final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
final Time time = DataTypeUtils.toTime(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
final Date date = new Date(time.getTime());
final Duration duration = Duration.between(date.toInstant().truncatedTo(ChronoUnit.DAYS), date.toInstant());
final long millisSinceMidnight = duration.toMillis();
return (int) millisSinceMidnight;
}
return DataTypeUtils.toInteger(rawValue, fieldName);
}
case LONG:
{
final LogicalType logicalType = fieldSchema.getLogicalType();
if (logicalType == null) {
return DataTypeUtils.toLong(rawValue, fieldName);
}
if (LOGICAL_TYPE_TIME_MICROS.equals(logicalType.getName())) {
final long longValue = getLongFromTimestamp(rawValue, fieldSchema, fieldName);
final Date date = new Date(longValue);
final Duration duration = Duration.between(date.toInstant().truncatedTo(ChronoUnit.DAYS), date.toInstant());
return duration.toMillis() * 1000L;
} else if (LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType.getName())) {
final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
Timestamp t = DataTypeUtils.toTimestamp(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
return getLongFromTimestamp(rawValue, fieldSchema, fieldName);
} else if (LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType.getName())) {
return getLongFromTimestamp(rawValue, fieldSchema, fieldName) * 1000L;
}
return DataTypeUtils.toLong(rawValue, fieldName);
}
case BYTES:
case FIXED:
final LogicalType logicalType = fieldSchema.getLogicalType();
if (logicalType != null && LOGICAL_TYPE_DECIMAL.equals(logicalType.getName())) {
final LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
final BigDecimal rawDecimal;
if (rawValue instanceof BigDecimal) {
rawDecimal = (BigDecimal) rawValue;
} else if (rawValue instanceof Double) {
rawDecimal = BigDecimal.valueOf((Double) rawValue);
} else if (rawValue instanceof String) {
rawDecimal = new BigDecimal((String) rawValue);
} else if (rawValue instanceof Integer) {
rawDecimal = new BigDecimal((Integer) rawValue);
} else if (rawValue instanceof Long) {
rawDecimal = new BigDecimal((Long) rawValue);
} else {
throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a logical decimal");
}
// If the desired scale is different than this value's coerce scale.
final int desiredScale = decimalType.getScale();
final BigDecimal decimal = rawDecimal.scale() == desiredScale ? rawDecimal : rawDecimal.setScale(desiredScale, BigDecimal.ROUND_HALF_UP);
return new Conversions.DecimalConversion().toBytes(decimal, fieldSchema, logicalType);
}
if (rawValue instanceof byte[]) {
return ByteBuffer.wrap((byte[]) rawValue);
}
if (rawValue instanceof Object[]) {
return AvroTypeUtil.convertByteArray((Object[]) rawValue);
} else {
throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a ByteBuffer");
}
case MAP:
if (rawValue instanceof Record) {
final Record recordValue = (Record) rawValue;
final Map<String, Object> map = new HashMap<>();
for (final RecordField recordField : recordValue.getSchema().getFields()) {
final Object v = recordValue.getValue(recordField);
if (v != null) {
map.put(recordField.getFieldName(), v);
}
}
return map;
} else if (rawValue instanceof Map) {
final Map<String, Object> objectMap = (Map<String, Object>) rawValue;
final Map<String, Object> map = new HashMap<>(objectMap.size());
for (final String s : objectMap.keySet()) {
final Object converted = convertToAvroObject(objectMap.get(s), fieldSchema.getValueType(), fieldName + "[" + s + "]");
map.put(s, converted);
}
return map;
} else {
throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a Map");
}
case RECORD:
final GenericData.Record avroRecord = new GenericData.Record(fieldSchema);
final Record record = (Record) rawValue;
for (final RecordField recordField : record.getSchema().getFields()) {
final Object recordFieldValue = record.getValue(recordField);
final String recordFieldName = recordField.getFieldName();
final Field field = fieldSchema.getField(recordFieldName);
if (field == null) {
continue;
}
final Object converted = convertToAvroObject(recordFieldValue, field.schema(), fieldName + "/" + recordFieldName);
avroRecord.put(recordFieldName, converted);
}
return avroRecord;
case UNION:
return convertUnionFieldValue(rawValue, fieldSchema, schema -> convertToAvroObject(rawValue, schema, fieldName), fieldName);
case ARRAY:
final Object[] objectArray = (Object[]) rawValue;
final List<Object> list = new ArrayList<>(objectArray.length);
int i = 0;
for (final Object o : objectArray) {
final Object converted = convertToAvroObject(o, fieldSchema.getElementType(), fieldName + "[" + i + "]");
list.add(converted);
i++;
}
return list;
case BOOLEAN:
return DataTypeUtils.toBoolean(rawValue, fieldName);
case DOUBLE:
return DataTypeUtils.toDouble(rawValue, fieldName);
case FLOAT:
return DataTypeUtils.toFloat(rawValue, fieldName);
case NULL:
return null;
case ENUM:
return new GenericData.EnumSymbol(fieldSchema, rawValue);
case STRING:
return DataTypeUtils.toString(rawValue, (String) null);
}
return rawValue;
}
use of org.apache.avro.LogicalType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
@SuppressWarnings("deprecation")
private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
builder = Types.primitive(INT32, repetition);
} else if (type.equals(Schema.Type.LONG)) {
builder = Types.primitive(INT64, repetition);
} else if (type.equals(Schema.Type.FLOAT)) {
builder = Types.primitive(FLOAT, repetition);
} else if (type.equals(Schema.Type.DOUBLE)) {
builder = Types.primitive(DOUBLE, repetition);
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
builder = Types.primitive(BINARY, repetition).as(UTF8);
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
} else if (type.equals(Schema.Type.ENUM)) {
builder = Types.primitive(BINARY, repetition).as(ENUM);
} else if (type.equals(Schema.Type.ARRAY)) {
if (writeOldListStructure) {
return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED));
} else {
return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType()));
}
} else if (type.equals(Schema.Type.MAP)) {
Type valType = convertField("value", schema.getValueType());
// avro map key type is always string
return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType);
} else if (type.equals(Schema.Type.FIXED)) {
builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition).length(schema.getFixedSize());
} else if (type.equals(Schema.Type.UNION)) {
return convertUnion(fieldName, schema, repetition);
} else {
throw new UnsupportedOperationException("Cannot convert Avro type " + type);
}
// schema translation can only be done for known logical types because this
// creates an equivalence
LogicalType logicalType = schema.getLogicalType();
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
builder = builder.as(DECIMAL).precision(((LogicalTypes.Decimal) logicalType).getPrecision()).scale(((LogicalTypes.Decimal) logicalType).getScale());
} else {
OriginalType annotation = convertLogicalType(logicalType);
if (annotation != null) {
builder.as(annotation);
}
}
}
return builder.named(fieldName);
}
use of org.apache.avro.LogicalType in project parquet-mr by apache.
the class AvroSchemaConverter method convertField.
private Schema convertField(final Type parquetType) {
if (parquetType.isPrimitive()) {
final PrimitiveType asPrimitive = parquetType.asPrimitiveType();
final PrimitiveTypeName parquetPrimitiveTypeName = asPrimitive.getPrimitiveTypeName();
final OriginalType annotation = parquetType.getOriginalType();
Schema schema = parquetPrimitiveTypeName.convert(new PrimitiveType.PrimitiveTypeNameConverter<Schema, RuntimeException>() {
@Override
public Schema convertBOOLEAN(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.BOOLEAN);
}
@Override
public Schema convertINT32(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.INT);
}
@Override
public Schema convertINT64(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.LONG);
}
@Override
public Schema convertINT96(PrimitiveTypeName primitiveTypeName) {
throw new IllegalArgumentException("INT96 not yet implemented.");
}
@Override
public Schema convertFLOAT(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.FLOAT);
}
@Override
public Schema convertDOUBLE(PrimitiveTypeName primitiveTypeName) {
return Schema.create(Schema.Type.DOUBLE);
}
@Override
public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) {
int size = parquetType.asPrimitiveType().getTypeLength();
return Schema.createFixed(parquetType.getName(), null, null, size);
}
@Override
public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
if (annotation == OriginalType.UTF8 || annotation == OriginalType.ENUM) {
return Schema.create(Schema.Type.STRING);
} else {
return Schema.create(Schema.Type.BYTES);
}
}
});
LogicalType logicalType = convertOriginalType(annotation, asPrimitive.getDecimalMetadata());
if (logicalType != null && (annotation != DECIMAL || parquetPrimitiveTypeName == BINARY || parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
schema = logicalType.addToSchema(schema);
}
return schema;
} else {
GroupType parquetGroupType = parquetType.asGroupType();
OriginalType originalType = parquetGroupType.getOriginalType();
if (originalType != null) {
switch(originalType) {
case LIST:
if (parquetGroupType.getFieldCount() != 1) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
Type repeatedType = parquetGroupType.getType(0);
if (!repeatedType.isRepetition(REPEATED)) {
throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
}
if (isElementType(repeatedType, parquetGroupType.getName())) {
// repeated element types are always required
return Schema.createArray(convertField(repeatedType));
} else {
Type elementType = repeatedType.asGroupType().getType(0);
if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createArray(optional(convertField(elementType)));
} else {
return Schema.createArray(convertField(elementType));
}
}
// for backward-compatibility
case MAP_KEY_VALUE:
case MAP:
if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
if (!mapKeyValType.isRepetition(REPEATED) || mapKeyValType.getFieldCount() != 2) {
throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
}
Type keyType = mapKeyValType.getType(0);
if (!keyType.isPrimitive() || !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveTypeName.BINARY) || !keyType.getOriginalType().equals(OriginalType.UTF8)) {
throw new IllegalArgumentException("Map key type must be binary (UTF8): " + keyType);
}
Type valueType = mapKeyValType.getType(1);
if (valueType.isRepetition(Type.Repetition.OPTIONAL)) {
return Schema.createMap(optional(convertField(valueType)));
} else {
return Schema.createMap(convertField(valueType));
}
case ENUM:
return Schema.create(Schema.Type.STRING);
case UTF8:
default:
throw new UnsupportedOperationException("Cannot convert Parquet type " + parquetType);
}
} else {
// if no original type then it's a record
return convertFields(parquetGroupType.getName(), parquetGroupType.getFields());
}
}
}
use of org.apache.avro.LogicalType in project parquet-mr by apache.
the class AvroRecordConverter method newConverter.
private static Converter newConverter(Schema schema, Type type, GenericData model, Class<?> knownClass, ParentValueContainer setter) {
LogicalType logicalType = schema.getLogicalType();
Conversion<?> conversion;
if (knownClass != null) {
conversion = model.getConversionByClass(knownClass, logicalType);
} else {
conversion = model.getConversionFor(logicalType);
}
ParentValueContainer parent = ParentValueContainer.getConversionContainer(setter, conversion, schema);
if (schema.getType().equals(Schema.Type.BOOLEAN)) {
return new AvroConverters.FieldBooleanConverter(parent);
} else if (schema.getType().equals(Schema.Type.INT)) {
Class<?> datumClass = getDatumClass(conversion, knownClass, schema, model);
if (datumClass == null) {
return new AvroConverters.FieldIntegerConverter(parent);
} else if (datumClass == byte.class || datumClass == Byte.class) {
return new AvroConverters.FieldByteConverter(parent);
} else if (datumClass == short.class || datumClass == Short.class) {
return new AvroConverters.FieldShortConverter(parent);
} else if (datumClass == char.class || datumClass == Character.class) {
return new AvroConverters.FieldCharConverter(parent);
}
return new AvroConverters.FieldIntegerConverter(parent);
} else if (schema.getType().equals(Schema.Type.LONG)) {
return new AvroConverters.FieldLongConverter(parent);
} else if (schema.getType().equals(Schema.Type.FLOAT)) {
return new AvroConverters.FieldFloatConverter(parent);
} else if (schema.getType().equals(Schema.Type.DOUBLE)) {
return new AvroConverters.FieldDoubleConverter(parent);
} else if (schema.getType().equals(Schema.Type.BYTES)) {
Class<?> datumClass = getDatumClass(conversion, knownClass, schema, model);
if (datumClass == null) {
return new AvroConverters.FieldByteBufferConverter(parent);
} else if (datumClass.isArray() && datumClass.getComponentType() == byte.class) {
return new AvroConverters.FieldByteArrayConverter(parent);
}
return new AvroConverters.FieldByteBufferConverter(parent);
} else if (schema.getType().equals(Schema.Type.STRING)) {
return newStringConverter(schema, model, parent);
} else if (schema.getType().equals(Schema.Type.RECORD)) {
return new AvroRecordConverter(parent, type.asGroupType(), schema, model);
} else if (schema.getType().equals(Schema.Type.ENUM)) {
return new AvroConverters.FieldEnumConverter(parent, schema, model);
} else if (schema.getType().equals(Schema.Type.ARRAY)) {
Class<?> datumClass = getDatumClass(conversion, knownClass, schema, model);
if (datumClass != null && datumClass.isArray()) {
return new AvroArrayConverter(parent, type.asGroupType(), schema, model, datumClass);
} else {
return new AvroCollectionConverter(parent, type.asGroupType(), schema, model, datumClass);
}
} else if (schema.getType().equals(Schema.Type.MAP)) {
return new MapConverter(parent, type.asGroupType(), schema, model);
} else if (schema.getType().equals(Schema.Type.UNION)) {
return new AvroUnionConverter(parent, type, schema, model);
} else if (schema.getType().equals(Schema.Type.FIXED)) {
return new AvroConverters.FieldFixedConverter(parent, schema, model);
}
throw new UnsupportedOperationException(String.format("Cannot convert Avro type: %s to Parquet type: %s", schema, type));
}
use of org.apache.avro.LogicalType in project tdi-studio-se by Talend.
the class DiOutgoingSchemaEnforcer method transformValue.
/**
* Transforms record column value from Avro type to Talend type
*
* @param value record column value, which should be transformed into Talend compatible value.
* It can be null when null
* corresponding wrapped field.
* @param valueField field, which contain information about value's Talend type. It mustn't be null
*/
protected Object transformValue(Object value, Field valueField) {
if (null == value) {
return null;
}
Schema nonnull = AvroUtils.unwrapIfNullable(valueField.schema());
LogicalType logicalType = nonnull.getLogicalType();
if (logicalType != null) {
if (logicalType == LogicalTypes.date()) {
Calendar c = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
c.setTimeInMillis(0L);
c.add(Calendar.DATE, (Integer) value);
return c.getTime();
} else if (logicalType == LogicalTypes.timeMillis()) {
return value;
} else if (logicalType == LogicalTypes.timestampMillis()) {
return new Date((Long) value);
}
}
// This might not always have been specified.
String talendType = valueField.getProp(TALEND6_COLUMN_TALEND_TYPE);
String javaClass = nonnull.getProp(SchemaConstants.JAVA_CLASS_FLAG);
// TODO(rskraba): A full list of type conversion to coerce to Talend-compatible types.
if ("id_Short".equals(talendType)) {
//$NON-NLS-1$
return value instanceof Number ? ((Number) value).shortValue() : Short.parseShort(String.valueOf(value));
} else if ("id_Date".equals(talendType) || "java.util.Date".equals(javaClass)) {
// FIXME - remove this mapping in favor of using Avro logical types
return value instanceof Date ? value : new Date((Long) value);
} else if ("id_Byte".equals(talendType)) {
//$NON-NLS-1$
return value instanceof Number ? ((Number) value).byteValue() : Byte.parseByte(String.valueOf(value));
} else if ("id_Character".equals(talendType) || "java.lang.Character".equals(javaClass)) {
return value instanceof Character ? value : ((String) value).charAt(0);
} else if ("id_BigDecimal".equals(talendType) || "java.math.BigDecimal".equals(javaClass)) {
return value instanceof BigDecimal ? value : new BigDecimal(String.valueOf(value));
}
return value;
}
Aggregations