use of org.apache.nifi.serialization.record.DataType in project nifi by apache.
the class AvroTypeUtil method createSchema.
/**
* Converts an Avro Schema to a RecordSchema
*
* @param avroSchema the Avro Schema to convert
* @param schemaText the textual representation of the schema
* @param schemaId the identifier of the schema
* @return the Corresponding Record Schema
*/
public static RecordSchema createSchema(final Schema avroSchema, final String schemaText, final SchemaIdentifier schemaId) {
if (avroSchema == null) {
throw new IllegalArgumentException("Avro Schema cannot be null");
}
String schemaFullName = avroSchema.getNamespace() + "." + avroSchema.getName();
SimpleRecordSchema recordSchema = new SimpleRecordSchema(schemaText, AVRO_SCHEMA_FORMAT, schemaId);
DataType recordSchemaType = RecordFieldType.RECORD.getRecordDataType(recordSchema);
Map<String, DataType> knownRecords = new HashMap<>();
knownRecords.put(schemaFullName, recordSchemaType);
final List<RecordField> recordFields = new ArrayList<>(avroSchema.getFields().size());
for (final Field field : avroSchema.getFields()) {
final String fieldName = field.name();
final Schema fieldSchema = field.schema();
final DataType dataType = AvroTypeUtil.determineDataType(fieldSchema, knownRecords);
final boolean nullable = isNullable(fieldSchema);
addFieldToList(recordFields, field, fieldName, fieldSchema, dataType, nullable);
}
recordSchema.setFields(recordFields);
return recordSchema;
}
use of org.apache.nifi.serialization.record.DataType in project nifi by apache.
the class AvroTypeUtil method convertAvroRecordToMap.
public static Map<String, Object> convertAvroRecordToMap(final GenericRecord avroRecord, final RecordSchema recordSchema) {
final Map<String, Object> values = new HashMap<>(recordSchema.getFieldCount());
for (final RecordField recordField : recordSchema.getFields()) {
Object value = avroRecord.get(recordField.getFieldName());
if (value == null) {
for (final String alias : recordField.getAliases()) {
value = avroRecord.get(alias);
if (value != null) {
break;
}
}
}
final String fieldName = recordField.getFieldName();
try {
final Field avroField = avroRecord.getSchema().getField(fieldName);
if (avroField == null) {
values.put(fieldName, null);
continue;
}
final Schema fieldSchema = avroField.schema();
final Object rawValue = normalizeValue(value, fieldSchema, fieldName);
final DataType desiredType = recordField.getDataType();
final Object coercedValue = DataTypeUtils.convertType(rawValue, desiredType, fieldName);
values.put(fieldName, coercedValue);
} catch (Exception ex) {
logger.debug("fail to convert field " + fieldName, ex);
throw ex;
}
}
return values;
}
use of org.apache.nifi.serialization.record.DataType in project nifi by apache.
the class AvroTypeUtil method buildAvroSchema.
private static Schema buildAvroSchema(final DataType dataType, final String fieldName, final boolean nullable) {
final Schema schema;
switch(dataType.getFieldType()) {
case ARRAY:
final ArrayDataType arrayDataType = (ArrayDataType) dataType;
final DataType elementDataType = arrayDataType.getElementType();
if (RecordFieldType.BYTE.equals(elementDataType.getFieldType())) {
schema = Schema.create(Type.BYTES);
} else {
final Schema elementType = buildAvroSchema(elementDataType, fieldName, false);
schema = Schema.createArray(elementType);
}
break;
case BIGINT:
schema = Schema.create(Type.STRING);
break;
case BOOLEAN:
schema = Schema.create(Type.BOOLEAN);
break;
case BYTE:
schema = Schema.create(Type.INT);
break;
case CHAR:
schema = Schema.create(Type.STRING);
break;
case CHOICE:
final ChoiceDataType choiceDataType = (ChoiceDataType) dataType;
final List<DataType> options = choiceDataType.getPossibleSubTypes();
// We need to keep track of which types have been added to the union, because if we have
// two elements in the UNION with the same type, it will fail - even if the logical type is
// different. So if we have an int and a logical type date (which also has a 'concrete type' of int)
// then an Exception will be thrown when we try to create the union. To avoid this, we just keep track
// of the Types and avoid adding it in such a case.
final List<Schema> unionTypes = new ArrayList<>(options.size());
final Set<Type> typesAdded = new HashSet<>();
for (final DataType option : options) {
final Schema optionSchema = buildAvroSchema(option, fieldName, false);
if (!typesAdded.contains(optionSchema.getType())) {
unionTypes.add(optionSchema);
typesAdded.add(optionSchema.getType());
}
}
schema = Schema.createUnion(unionTypes);
break;
case DATE:
schema = Schema.create(Type.INT);
LogicalTypes.date().addToSchema(schema);
break;
case DOUBLE:
schema = Schema.create(Type.DOUBLE);
break;
case FLOAT:
schema = Schema.create(Type.FLOAT);
break;
case INT:
schema = Schema.create(Type.INT);
break;
case LONG:
schema = Schema.create(Type.LONG);
break;
case MAP:
schema = Schema.createMap(buildAvroSchema(((MapDataType) dataType).getValueType(), fieldName, false));
break;
case RECORD:
final RecordDataType recordDataType = (RecordDataType) dataType;
final RecordSchema childSchema = recordDataType.getChildSchema();
final List<Field> childFields = new ArrayList<>(childSchema.getFieldCount());
for (final RecordField field : childSchema.getFields()) {
childFields.add(buildAvroField(field));
}
schema = Schema.createRecord(fieldName + "Type", null, "org.apache.nifi", false, childFields);
break;
case SHORT:
schema = Schema.create(Type.INT);
break;
case STRING:
schema = Schema.create(Type.STRING);
break;
case TIME:
schema = Schema.create(Type.INT);
LogicalTypes.timeMillis().addToSchema(schema);
break;
case TIMESTAMP:
schema = Schema.create(Type.LONG);
LogicalTypes.timestampMillis().addToSchema(schema);
break;
default:
return null;
}
if (nullable) {
return nullable(schema);
} else {
return schema;
}
}
use of org.apache.nifi.serialization.record.DataType in project nifi by apache.
the class TestAvroTypeUtil method testCreateAvroSchemaPrimitiveTypes.
@Test
public void testCreateAvroSchemaPrimitiveTypes() throws SchemaNotFoundException {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("int", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("long", RecordFieldType.LONG.getDataType()));
fields.add(new RecordField("string", RecordFieldType.STRING.getDataType(), "hola", Collections.singleton("greeting")));
fields.add(new RecordField("byte", RecordFieldType.BYTE.getDataType()));
fields.add(new RecordField("char", RecordFieldType.CHAR.getDataType()));
fields.add(new RecordField("short", RecordFieldType.SHORT.getDataType()));
fields.add(new RecordField("double", RecordFieldType.DOUBLE.getDataType()));
fields.add(new RecordField("float", RecordFieldType.FLOAT.getDataType()));
fields.add(new RecordField("time", RecordFieldType.TIME.getDataType()));
fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
fields.add(new RecordField("timestamp", RecordFieldType.TIMESTAMP.getDataType()));
final DataType arrayType = RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.STRING.getDataType());
fields.add(new RecordField("strings", arrayType));
final DataType mapType = RecordFieldType.MAP.getMapDataType(RecordFieldType.LONG.getDataType());
fields.add(new RecordField("map", mapType));
final List<RecordField> personFields = new ArrayList<>();
personFields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
personFields.add(new RecordField("dob", RecordFieldType.DATE.getDataType()));
final RecordSchema personSchema = new SimpleRecordSchema(personFields);
final DataType personType = RecordFieldType.RECORD.getRecordDataType(personSchema);
fields.add(new RecordField("person", personType));
final RecordSchema recordSchema = new SimpleRecordSchema(fields);
final Schema avroSchema = AvroTypeUtil.extractAvroSchema(recordSchema);
// everything should be a union, since it's nullable.
for (final Field field : avroSchema.getFields()) {
final Schema fieldSchema = field.schema();
assertEquals(Type.UNION, fieldSchema.getType());
assertTrue("Field " + field.name() + " does not contain NULL type", fieldSchema.getTypes().contains(Schema.create(Type.NULL)));
}
final RecordSchema afterConversion = AvroTypeUtil.createSchema(avroSchema);
assertEquals(RecordFieldType.INT.getDataType(), afterConversion.getDataType("int").get());
assertEquals(RecordFieldType.LONG.getDataType(), afterConversion.getDataType("long").get());
assertEquals(RecordFieldType.STRING.getDataType(), afterConversion.getDataType("string").get());
assertEquals(RecordFieldType.INT.getDataType(), afterConversion.getDataType("byte").get());
assertEquals(RecordFieldType.STRING.getDataType(), afterConversion.getDataType("char").get());
assertEquals(RecordFieldType.INT.getDataType(), afterConversion.getDataType("short").get());
assertEquals(RecordFieldType.DOUBLE.getDataType(), afterConversion.getDataType("double").get());
assertEquals(RecordFieldType.FLOAT.getDataType(), afterConversion.getDataType("float").get());
assertEquals(RecordFieldType.TIME.getDataType(), afterConversion.getDataType("time").get());
assertEquals(RecordFieldType.DATE.getDataType(), afterConversion.getDataType("date").get());
assertEquals(RecordFieldType.TIMESTAMP.getDataType(), afterConversion.getDataType("timestamp").get());
assertEquals(arrayType, afterConversion.getDataType("strings").get());
assertEquals(mapType, afterConversion.getDataType("map").get());
assertEquals(personType, afterConversion.getDataType("person").get());
final RecordField stringField = afterConversion.getField("string").get();
assertEquals("hola", stringField.getDefaultValue());
assertEquals(Collections.singleton("greeting"), stringField.getAliases());
}
use of org.apache.nifi.serialization.record.DataType in project nifi by apache.
the class AvroTypeUtil method determineDataType.
public static DataType determineDataType(final Schema avroSchema, Map<String, DataType> knownRecordTypes) {
if (knownRecordTypes == null) {
throw new IllegalArgumentException("'knownRecordTypes' cannot be null.");
}
final Type avroType = avroSchema.getType();
final LogicalType logicalType = avroSchema.getLogicalType();
if (logicalType != null) {
final String logicalTypeName = logicalType.getName();
switch(logicalTypeName) {
case LOGICAL_TYPE_DATE:
return RecordFieldType.DATE.getDataType();
case LOGICAL_TYPE_TIME_MILLIS:
case LOGICAL_TYPE_TIME_MICROS:
return RecordFieldType.TIME.getDataType();
case LOGICAL_TYPE_TIMESTAMP_MILLIS:
case LOGICAL_TYPE_TIMESTAMP_MICROS:
return RecordFieldType.TIMESTAMP.getDataType();
case LOGICAL_TYPE_DECIMAL:
// Alternatively we could convert it to String, but numeric type is generally more preferable by users.
return RecordFieldType.DOUBLE.getDataType();
}
}
switch(avroType) {
case ARRAY:
return RecordFieldType.ARRAY.getArrayDataType(determineDataType(avroSchema.getElementType(), knownRecordTypes));
case BYTES:
case FIXED:
return RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType());
case BOOLEAN:
return RecordFieldType.BOOLEAN.getDataType();
case DOUBLE:
return RecordFieldType.DOUBLE.getDataType();
case ENUM:
case STRING:
return RecordFieldType.STRING.getDataType();
case FLOAT:
return RecordFieldType.FLOAT.getDataType();
case INT:
return RecordFieldType.INT.getDataType();
case LONG:
return RecordFieldType.LONG.getDataType();
case RECORD:
{
String schemaFullName = avroSchema.getNamespace() + "." + avroSchema.getName();
if (knownRecordTypes.containsKey(schemaFullName)) {
return knownRecordTypes.get(schemaFullName);
} else {
SimpleRecordSchema recordSchema = new SimpleRecordSchema(avroSchema.toString(), AVRO_SCHEMA_FORMAT, SchemaIdentifier.EMPTY);
DataType recordSchemaType = RecordFieldType.RECORD.getRecordDataType(recordSchema);
knownRecordTypes.put(schemaFullName, recordSchemaType);
final List<Field> avroFields = avroSchema.getFields();
final List<RecordField> recordFields = new ArrayList<>(avroFields.size());
for (final Field field : avroFields) {
final String fieldName = field.name();
final Schema fieldSchema = field.schema();
final DataType fieldType = determineDataType(fieldSchema, knownRecordTypes);
final boolean nullable = isNullable(fieldSchema);
addFieldToList(recordFields, field, fieldName, fieldSchema, fieldType, nullable);
}
recordSchema.setFields(recordFields);
return recordSchemaType;
}
}
case NULL:
return RecordFieldType.STRING.getDataType();
case MAP:
final Schema valueSchema = avroSchema.getValueType();
final DataType valueType = determineDataType(valueSchema, knownRecordTypes);
return RecordFieldType.MAP.getMapDataType(valueType);
case UNION:
{
final List<Schema> nonNullSubSchemas = getNonNullSubSchemas(avroSchema);
if (nonNullSubSchemas.size() == 1) {
return determineDataType(nonNullSubSchemas.get(0), knownRecordTypes);
}
final List<DataType> possibleChildTypes = new ArrayList<>(nonNullSubSchemas.size());
for (final Schema subSchema : nonNullSubSchemas) {
final DataType childDataType = determineDataType(subSchema, knownRecordTypes);
possibleChildTypes.add(childDataType);
}
return RecordFieldType.CHOICE.getChoiceDataType(possibleChildTypes);
}
}
return null;
}
Aggregations