Search in sources :

Example 86 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class AvroTypeUtil method lookupField.

/**
 * Method that attempts to map a record field into a provided schema
 * @param avroSchema - Schema to map into
 * @param recordField - The field of the record to be mapped
 * @return Pair with the LHS being the field name and RHS being the mapped field from the schema
 */
protected static Pair<String, Field> lookupField(final Schema avroSchema, final RecordField recordField) {
    String fieldName = recordField.getFieldName();
    // Attempt to locate the field as is in a true 1:1 mapping with the same name
    Field field = avroSchema.getField(fieldName);
    if (field == null) {
        // No straight mapping was found, so check the aliases to see if it can be mapped
        for (final String alias : recordField.getAliases()) {
            field = avroSchema.getField(alias);
            if (field != null) {
                fieldName = alias;
                break;
            }
        }
    }
    return new ImmutablePair<>(fieldName, field);
}
Also used : Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 87 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class AvroTypeUtil method convertAvroRecordToMap.

public static Map<String, Object> convertAvroRecordToMap(final GenericRecord avroRecord, final RecordSchema recordSchema) {
    final Map<String, Object> values = new HashMap<>(recordSchema.getFieldCount());
    for (final RecordField recordField : recordSchema.getFields()) {
        Object value = avroRecord.get(recordField.getFieldName());
        if (value == null) {
            for (final String alias : recordField.getAliases()) {
                value = avroRecord.get(alias);
                if (value != null) {
                    break;
                }
            }
        }
        final String fieldName = recordField.getFieldName();
        try {
            final Field avroField = avroRecord.getSchema().getField(fieldName);
            if (avroField == null) {
                values.put(fieldName, null);
                continue;
            }
            final Schema fieldSchema = avroField.schema();
            final Object rawValue = normalizeValue(value, fieldSchema, fieldName);
            final DataType desiredType = recordField.getDataType();
            final Object coercedValue = DataTypeUtils.convertType(rawValue, desiredType, fieldName);
            values.put(fieldName, coercedValue);
        } catch (Exception ex) {
            logger.debug("fail to convert field " + fieldName, ex);
            throw ex;
        }
    }
    return values;
}
Also used : Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) IllegalTypeConversionException(org.apache.nifi.serialization.record.util.IllegalTypeConversionException) IOException(java.io.IOException)

Example 88 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class AvroTypeUtil method createAvroRecord.

public static GenericRecord createAvroRecord(final Record record, final Schema avroSchema) throws IOException {
    final GenericRecord rec = new GenericData.Record(avroSchema);
    final RecordSchema recordSchema = record.getSchema();
    for (final RecordField recordField : recordSchema.getFields()) {
        final Object rawValue = record.getValue(recordField);
        Pair<String, Field> fieldPair = lookupField(avroSchema, recordField);
        final String fieldName = fieldPair.getLeft();
        final Field field = fieldPair.getRight();
        if (field == null) {
            continue;
        }
        final Object converted = convertToAvroObject(rawValue, field.schema(), fieldName);
        rec.put(fieldName, converted);
    }
    // value then we want to populate it in the GenericRecord being produced
    for (final Field field : avroSchema.getFields()) {
        final Optional<RecordField> recordField = recordSchema.getField(field.name());
        if (!recordField.isPresent() && rec.get(field.name()) == null && field.defaultVal() != null) {
            rec.put(field.name(), field.defaultVal());
        }
    }
    return rec;
}
Also used : Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) RecordField(org.apache.nifi.serialization.record.RecordField) SpecificRecord(org.apache.avro.specific.SpecificRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 89 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class AvroTypeUtil method buildAvroSchema.

private static Schema buildAvroSchema(final DataType dataType, final String fieldName, final boolean nullable) {
    final Schema schema;
    switch(dataType.getFieldType()) {
        case ARRAY:
            final ArrayDataType arrayDataType = (ArrayDataType) dataType;
            final DataType elementDataType = arrayDataType.getElementType();
            if (RecordFieldType.BYTE.equals(elementDataType.getFieldType())) {
                schema = Schema.create(Type.BYTES);
            } else {
                final Schema elementType = buildAvroSchema(elementDataType, fieldName, false);
                schema = Schema.createArray(elementType);
            }
            break;
        case BIGINT:
            schema = Schema.create(Type.STRING);
            break;
        case BOOLEAN:
            schema = Schema.create(Type.BOOLEAN);
            break;
        case BYTE:
            schema = Schema.create(Type.INT);
            break;
        case CHAR:
            schema = Schema.create(Type.STRING);
            break;
        case CHOICE:
            final ChoiceDataType choiceDataType = (ChoiceDataType) dataType;
            final List<DataType> options = choiceDataType.getPossibleSubTypes();
            // We need to keep track of which types have been added to the union, because if we have
            // two elements in the UNION with the same type, it will fail - even if the logical type is
            // different. So if we have an int and a logical type date (which also has a 'concrete type' of int)
            // then an Exception will be thrown when we try to create the union. To avoid this, we just keep track
            // of the Types and avoid adding it in such a case.
            final List<Schema> unionTypes = new ArrayList<>(options.size());
            final Set<Type> typesAdded = new HashSet<>();
            for (final DataType option : options) {
                final Schema optionSchema = buildAvroSchema(option, fieldName, false);
                if (!typesAdded.contains(optionSchema.getType())) {
                    unionTypes.add(optionSchema);
                    typesAdded.add(optionSchema.getType());
                }
            }
            schema = Schema.createUnion(unionTypes);
            break;
        case DATE:
            schema = Schema.create(Type.INT);
            LogicalTypes.date().addToSchema(schema);
            break;
        case DOUBLE:
            schema = Schema.create(Type.DOUBLE);
            break;
        case FLOAT:
            schema = Schema.create(Type.FLOAT);
            break;
        case INT:
            schema = Schema.create(Type.INT);
            break;
        case LONG:
            schema = Schema.create(Type.LONG);
            break;
        case MAP:
            schema = Schema.createMap(buildAvroSchema(((MapDataType) dataType).getValueType(), fieldName, false));
            break;
        case RECORD:
            final RecordDataType recordDataType = (RecordDataType) dataType;
            final RecordSchema childSchema = recordDataType.getChildSchema();
            final List<Field> childFields = new ArrayList<>(childSchema.getFieldCount());
            for (final RecordField field : childSchema.getFields()) {
                childFields.add(buildAvroField(field));
            }
            schema = Schema.createRecord(fieldName + "Type", null, "org.apache.nifi", false, childFields);
            break;
        case SHORT:
            schema = Schema.create(Type.INT);
            break;
        case STRING:
            schema = Schema.create(Type.STRING);
            break;
        case TIME:
            schema = Schema.create(Type.INT);
            LogicalTypes.timeMillis().addToSchema(schema);
            break;
        case TIMESTAMP:
            schema = Schema.create(Type.LONG);
            LogicalTypes.timestampMillis().addToSchema(schema);
            break;
        default:
            return null;
    }
    if (nullable) {
        return nullable(schema);
    } else {
        return schema;
    }
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ArrayList(java.util.ArrayList) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) Type(org.apache.avro.Schema.Type) LogicalType(org.apache.avro.LogicalType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) HashSet(java.util.HashSet)

Example 90 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class AvroTypeUtil method convertToAvroObject.

@SuppressWarnings("unchecked")
private static Object convertToAvroObject(final Object rawValue, final Schema fieldSchema, final String fieldName) {
    if (rawValue == null) {
        return null;
    }
    switch(fieldSchema.getType()) {
        case INT:
            {
                final LogicalType logicalType = fieldSchema.getLogicalType();
                if (logicalType == null) {
                    return DataTypeUtils.toInteger(rawValue, fieldName);
                }
                if (LOGICAL_TYPE_DATE.equals(logicalType.getName())) {
                    final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
                    final Date date = DataTypeUtils.toDate(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
                    final Duration duration = Duration.between(new Date(0L).toInstant(), new Date(date.getTime()).toInstant());
                    final long days = duration.toDays();
                    return (int) days;
                } else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalType.getName())) {
                    final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
                    final Time time = DataTypeUtils.toTime(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
                    final Date date = new Date(time.getTime());
                    final Duration duration = Duration.between(date.toInstant().truncatedTo(ChronoUnit.DAYS), date.toInstant());
                    final long millisSinceMidnight = duration.toMillis();
                    return (int) millisSinceMidnight;
                }
                return DataTypeUtils.toInteger(rawValue, fieldName);
            }
        case LONG:
            {
                final LogicalType logicalType = fieldSchema.getLogicalType();
                if (logicalType == null) {
                    return DataTypeUtils.toLong(rawValue, fieldName);
                }
                if (LOGICAL_TYPE_TIME_MICROS.equals(logicalType.getName())) {
                    final long longValue = getLongFromTimestamp(rawValue, fieldSchema, fieldName);
                    final Date date = new Date(longValue);
                    final Duration duration = Duration.between(date.toInstant().truncatedTo(ChronoUnit.DAYS), date.toInstant());
                    return duration.toMillis() * 1000L;
                } else if (LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalType.getName())) {
                    final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
                    Timestamp t = DataTypeUtils.toTimestamp(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
                    return getLongFromTimestamp(rawValue, fieldSchema, fieldName);
                } else if (LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalType.getName())) {
                    return getLongFromTimestamp(rawValue, fieldSchema, fieldName) * 1000L;
                }
                return DataTypeUtils.toLong(rawValue, fieldName);
            }
        case BYTES:
        case FIXED:
            final LogicalType logicalType = fieldSchema.getLogicalType();
            if (logicalType != null && LOGICAL_TYPE_DECIMAL.equals(logicalType.getName())) {
                final LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
                final BigDecimal rawDecimal;
                if (rawValue instanceof BigDecimal) {
                    rawDecimal = (BigDecimal) rawValue;
                } else if (rawValue instanceof Double) {
                    rawDecimal = BigDecimal.valueOf((Double) rawValue);
                } else if (rawValue instanceof String) {
                    rawDecimal = new BigDecimal((String) rawValue);
                } else if (rawValue instanceof Integer) {
                    rawDecimal = new BigDecimal((Integer) rawValue);
                } else if (rawValue instanceof Long) {
                    rawDecimal = new BigDecimal((Long) rawValue);
                } else {
                    throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a logical decimal");
                }
                // If the desired scale is different than this value's coerce scale.
                final int desiredScale = decimalType.getScale();
                final BigDecimal decimal = rawDecimal.scale() == desiredScale ? rawDecimal : rawDecimal.setScale(desiredScale, BigDecimal.ROUND_HALF_UP);
                return new Conversions.DecimalConversion().toBytes(decimal, fieldSchema, logicalType);
            }
            if (rawValue instanceof byte[]) {
                return ByteBuffer.wrap((byte[]) rawValue);
            }
            if (rawValue instanceof Object[]) {
                return AvroTypeUtil.convertByteArray((Object[]) rawValue);
            } else {
                throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a ByteBuffer");
            }
        case MAP:
            if (rawValue instanceof Record) {
                final Record recordValue = (Record) rawValue;
                final Map<String, Object> map = new HashMap<>();
                for (final RecordField recordField : recordValue.getSchema().getFields()) {
                    final Object v = recordValue.getValue(recordField);
                    if (v != null) {
                        map.put(recordField.getFieldName(), v);
                    }
                }
                return map;
            } else if (rawValue instanceof Map) {
                final Map<String, Object> objectMap = (Map<String, Object>) rawValue;
                final Map<String, Object> map = new HashMap<>(objectMap.size());
                for (final String s : objectMap.keySet()) {
                    final Object converted = convertToAvroObject(objectMap.get(s), fieldSchema.getValueType(), fieldName + "[" + s + "]");
                    map.put(s, converted);
                }
                return map;
            } else {
                throw new IllegalTypeConversionException("Cannot convert value " + rawValue + " of type " + rawValue.getClass() + " to a Map");
            }
        case RECORD:
            final GenericData.Record avroRecord = new GenericData.Record(fieldSchema);
            final Record record = (Record) rawValue;
            for (final RecordField recordField : record.getSchema().getFields()) {
                final Object recordFieldValue = record.getValue(recordField);
                final String recordFieldName = recordField.getFieldName();
                final Field field = fieldSchema.getField(recordFieldName);
                if (field == null) {
                    continue;
                }
                final Object converted = convertToAvroObject(recordFieldValue, field.schema(), fieldName + "/" + recordFieldName);
                avroRecord.put(recordFieldName, converted);
            }
            return avroRecord;
        case UNION:
            return convertUnionFieldValue(rawValue, fieldSchema, schema -> convertToAvroObject(rawValue, schema, fieldName), fieldName);
        case ARRAY:
            final Object[] objectArray = (Object[]) rawValue;
            final List<Object> list = new ArrayList<>(objectArray.length);
            int i = 0;
            for (final Object o : objectArray) {
                final Object converted = convertToAvroObject(o, fieldSchema.getElementType(), fieldName + "[" + i + "]");
                list.add(converted);
                i++;
            }
            return list;
        case BOOLEAN:
            return DataTypeUtils.toBoolean(rawValue, fieldName);
        case DOUBLE:
            return DataTypeUtils.toDouble(rawValue, fieldName);
        case FLOAT:
            return DataTypeUtils.toFloat(rawValue, fieldName);
        case NULL:
            return null;
        case ENUM:
            return new GenericData.EnumSymbol(fieldSchema, rawValue);
        case STRING:
            return DataTypeUtils.toString(rawValue, (String) null);
    }
    return rawValue;
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LogicalType(org.apache.avro.LogicalType) Time(java.sql.Time) Timestamp(java.sql.Timestamp) Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) BigDecimal(java.math.BigDecimal) IllegalTypeConversionException(org.apache.nifi.serialization.record.util.IllegalTypeConversionException) SpecificRecord(org.apache.avro.specific.SpecificRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) LogicalTypes(org.apache.avro.LogicalTypes) Duration(java.time.Duration) GenericData(org.apache.avro.generic.GenericData) Date(java.util.Date) BigDecimal(java.math.BigDecimal) Conversions(org.apache.avro.Conversions) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

RecordField (org.apache.nifi.serialization.record.RecordField)173 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)133 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)130 ArrayList (java.util.ArrayList)116 Test (org.junit.Test)108 Record (org.apache.nifi.serialization.record.Record)97 MapRecord (org.apache.nifi.serialization.record.MapRecord)73 HashMap (java.util.HashMap)52 InputStream (java.io.InputStream)48 FileInputStream (java.io.FileInputStream)44 ByteArrayInputStream (java.io.ByteArrayInputStream)43 ComponentLog (org.apache.nifi.logging.ComponentLog)39 DataType (org.apache.nifi.serialization.record.DataType)37 LinkedHashMap (java.util.LinkedHashMap)36 File (java.io.File)21 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)17 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)17 Schema (org.apache.avro.Schema)16 RecordFieldType (org.apache.nifi.serialization.record.RecordFieldType)16