Search in sources :

Example 6 with RecordDataType

use of org.apache.nifi.serialization.record.type.RecordDataType in project nifi by apache.

the class AvroTypeUtil method buildAvroSchema.

private static Schema buildAvroSchema(final DataType dataType, final String fieldName, final boolean nullable) {
    final Schema schema;
    switch(dataType.getFieldType()) {
        case ARRAY:
            final ArrayDataType arrayDataType = (ArrayDataType) dataType;
            final DataType elementDataType = arrayDataType.getElementType();
            if (RecordFieldType.BYTE.equals(elementDataType.getFieldType())) {
                schema = Schema.create(Type.BYTES);
            } else {
                final Schema elementType = buildAvroSchema(elementDataType, fieldName, false);
                schema = Schema.createArray(elementType);
            }
            break;
        case BIGINT:
            schema = Schema.create(Type.STRING);
            break;
        case BOOLEAN:
            schema = Schema.create(Type.BOOLEAN);
            break;
        case BYTE:
            schema = Schema.create(Type.INT);
            break;
        case CHAR:
            schema = Schema.create(Type.STRING);
            break;
        case CHOICE:
            final ChoiceDataType choiceDataType = (ChoiceDataType) dataType;
            final List<DataType> options = choiceDataType.getPossibleSubTypes();
            // We need to keep track of which types have been added to the union, because if we have
            // two elements in the UNION with the same type, it will fail - even if the logical type is
            // different. So if we have an int and a logical type date (which also has a 'concrete type' of int)
            // then an Exception will be thrown when we try to create the union. To avoid this, we just keep track
            // of the Types and avoid adding it in such a case.
            final List<Schema> unionTypes = new ArrayList<>(options.size());
            final Set<Type> typesAdded = new HashSet<>();
            for (final DataType option : options) {
                final Schema optionSchema = buildAvroSchema(option, fieldName, false);
                if (!typesAdded.contains(optionSchema.getType())) {
                    unionTypes.add(optionSchema);
                    typesAdded.add(optionSchema.getType());
                }
            }
            schema = Schema.createUnion(unionTypes);
            break;
        case DATE:
            schema = Schema.create(Type.INT);
            LogicalTypes.date().addToSchema(schema);
            break;
        case DOUBLE:
            schema = Schema.create(Type.DOUBLE);
            break;
        case FLOAT:
            schema = Schema.create(Type.FLOAT);
            break;
        case INT:
            schema = Schema.create(Type.INT);
            break;
        case LONG:
            schema = Schema.create(Type.LONG);
            break;
        case MAP:
            schema = Schema.createMap(buildAvroSchema(((MapDataType) dataType).getValueType(), fieldName, false));
            break;
        case RECORD:
            final RecordDataType recordDataType = (RecordDataType) dataType;
            final RecordSchema childSchema = recordDataType.getChildSchema();
            final List<Field> childFields = new ArrayList<>(childSchema.getFieldCount());
            for (final RecordField field : childSchema.getFields()) {
                childFields.add(buildAvroField(field));
            }
            schema = Schema.createRecord(fieldName + "Type", null, "org.apache.nifi", false, childFields);
            break;
        case SHORT:
            schema = Schema.create(Type.INT);
            break;
        case STRING:
            schema = Schema.create(Type.STRING);
            break;
        case TIME:
            schema = Schema.create(Type.INT);
            LogicalTypes.timeMillis().addToSchema(schema);
            break;
        case TIMESTAMP:
            schema = Schema.create(Type.LONG);
            LogicalTypes.timestampMillis().addToSchema(schema);
            break;
        default:
            return null;
    }
    if (nullable) {
        return nullable(schema);
    } else {
        return schema;
    }
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ArrayList(java.util.ArrayList) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) Type(org.apache.avro.Schema.Type) LogicalType(org.apache.avro.LogicalType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) HashSet(java.util.HashSet)

Example 7 with RecordDataType

use of org.apache.nifi.serialization.record.type.RecordDataType in project nifi by apache.

the class TestAvroTypeUtil method testDefaultArrayValuesInRecordsCase2.

/**
 * The issue consists on having an Avro's schema with a default value in an
 * array. See
 * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
 * @throws IOException
 *             schema not found.
 */
@Test
public void testDefaultArrayValuesInRecordsCase2() throws IOException {
    Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayInRecords2.json"));
    GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
    Record field1Record = new GenericRecordBuilder(avroSchema.getField("field1").schema()).build();
    builder.set("field1", field1Record);
    Record r = builder.build();
    @SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) ((GenericRecord) r.get("field1")).get("listOfInt");
    assertArrayEquals(new Object[] { 1, 2, 3 }, values.toArray());
    RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
    RecordField field = record.getField("field1").get();
    assertEquals(RecordFieldType.RECORD, field.getDataType().getFieldType());
    RecordDataType data = (RecordDataType) field.getDataType();
    RecordSchema childSchema = data.getChildSchema();
    RecordField childField = childSchema.getField("listOfInt").get();
    assertEquals(RecordFieldType.ARRAY, childField.getDataType().getFieldType());
    assertTrue(childField.getDefaultValue() instanceof Object[]);
    assertArrayEquals(new Object[] { 1, 2, 3 }, ((Object[]) childField.getDefaultValue()));
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) GenericData(org.apache.avro.generic.GenericData) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 8 with RecordDataType

use of org.apache.nifi.serialization.record.type.RecordDataType in project nifi by apache.

the class TestAvroTypeUtil method testComplicatedRecursiveSchema.

@Test
public // A
void testComplicatedRecursiveSchema() {
    Schema recursiveSchema = new Schema.Parser().parse("{\n" + "  \"namespace\": \"org.apache.nifi.testing\",\n" + "  \"name\": \"Record_A\",\n" + "  \"type\": \"record\",\n" + "  \"fields\": [\n" + "    {\n" + "      \"name\": \"id\",\n" + "      \"type\": \"int\"\n" + "    },\n" + "    {\n" + "      \"name\": \"value\",\n" + "      \"type\": \"string\"\n" + "    },\n" + "    {\n" + "      \"name\": \"child\",\n" + "      \"type\": {\n" + "        \"namespace\": \"org.apache.nifi.testing\",\n" + "        \"name\": \"Record_B\",\n" + "        \"type\": \"record\",\n" + "        \"fields\": [\n" + "          {\n" + "            \"name\": \"id\",\n" + "            \"type\": \"int\"\n" + "          },\n" + "          {\n" + "            \"name\": \"value\",\n" + "            \"type\": \"string\"\n" + "          },\n" + "          {\n" + "            \"name\": \"parent\",\n" + "            \"type\": [\n" + "              \"null\",\n" + "              \"Record_A\"\n" + "            ]\n" + "          }\n" + "        ]\n" + "      }\n" + "    }\n" + "  ]\n" + "}\n");
    // Make sure the following doesn't throw an exception
    RecordSchema recordASchema = AvroTypeUtil.createSchema(recursiveSchema);
    // Make sure it parsed correctly
    Assert.assertEquals(3, recordASchema.getFieldCount());
    Optional<RecordField> recordAIdField = recordASchema.getField("id");
    Assert.assertTrue(recordAIdField.isPresent());
    Assert.assertEquals(RecordFieldType.INT, recordAIdField.get().getDataType().getFieldType());
    Optional<RecordField> recordAValueField = recordASchema.getField("value");
    Assert.assertTrue(recordAValueField.isPresent());
    Assert.assertEquals(RecordFieldType.STRING, recordAValueField.get().getDataType().getFieldType());
    Optional<RecordField> recordAChildField = recordASchema.getField("child");
    Assert.assertTrue(recordAChildField.isPresent());
    Assert.assertEquals(RecordFieldType.RECORD, recordAChildField.get().getDataType().getFieldType());
    // Get the child schema
    RecordSchema recordBSchema = ((RecordDataType) recordAChildField.get().getDataType()).getChildSchema();
    // Make sure it parsed correctly
    Assert.assertEquals(3, recordBSchema.getFieldCount());
    Optional<RecordField> recordBIdField = recordBSchema.getField("id");
    Assert.assertTrue(recordBIdField.isPresent());
    Assert.assertEquals(RecordFieldType.INT, recordBIdField.get().getDataType().getFieldType());
    Optional<RecordField> recordBValueField = recordBSchema.getField("value");
    Assert.assertTrue(recordBValueField.isPresent());
    Assert.assertEquals(RecordFieldType.STRING, recordBValueField.get().getDataType().getFieldType());
    Optional<RecordField> recordBParentField = recordBSchema.getField("parent");
    Assert.assertTrue(recordBParentField.isPresent());
    Assert.assertEquals(RecordFieldType.RECORD, recordBParentField.get().getDataType().getFieldType());
    // Make sure the 'parent' field has a schema reference back to the original top
    // level record schema
    Assert.assertEquals(recordASchema, ((RecordDataType) recordBParentField.get().getDataType()).getChildSchema());
}
Also used : RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 9 with RecordDataType

use of org.apache.nifi.serialization.record.type.RecordDataType in project nifi by apache.

the class TestAvroTypeUtil method testDefaultArrayValuesInRecordsCase1.

/**
 * The issue consists on having an Avro's schema with a default value in an
 * array. See
 * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
 * @throws IOException
 *             schema not found.
 */
@Test
public void testDefaultArrayValuesInRecordsCase1() throws IOException {
    Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayInRecords1.json"));
    GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
    Record field1Record = new GenericRecordBuilder(avroSchema.getField("field1").schema()).build();
    builder.set("field1", field1Record);
    Record r = builder.build();
    @SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) ((GenericRecord) r.get("field1")).get("listOfInt");
    assertArrayEquals(new Object[] {}, values.toArray());
    RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
    RecordField field = record.getField("field1").get();
    assertEquals(RecordFieldType.RECORD, field.getDataType().getFieldType());
    RecordDataType data = (RecordDataType) field.getDataType();
    RecordSchema childSchema = data.getChildSchema();
    RecordField childField = childSchema.getField("listOfInt").get();
    assertEquals(RecordFieldType.ARRAY, childField.getDataType().getFieldType());
    assertTrue(childField.getDefaultValue() instanceof Object[]);
    assertArrayEquals(new Object[] {}, ((Object[]) childField.getDefaultValue()));
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) GenericData(org.apache.avro.generic.GenericData) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 10 with RecordDataType

use of org.apache.nifi.serialization.record.type.RecordDataType in project nifi by apache.

the class StandardSchemaValidator method verifyChildRecord.

private void verifyChildRecord(final DataType canonicalDataType, final Object rawValue, final DataType expectedDataType, final StandardSchemaValidationResult result, final RecordField field, final String fieldPrefix) {
    // Now that we have the 'canonical data type', we check if it is a Record. If so, we need to validate each sub-field.
    if (canonicalDataType.getFieldType() == RecordFieldType.RECORD) {
        if (!(rawValue instanceof Record)) {
            // sanity check
            result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD, "Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + expectedDataType));
            return;
        }
        final RecordDataType recordDataType = (RecordDataType) canonicalDataType;
        final RecordSchema childSchema = recordDataType.getChildSchema();
        final String fullChildFieldName = concat(fieldPrefix, field);
        final SchemaValidationResult childValidationResult = validate((Record) rawValue, childSchema, fullChildFieldName);
        if (childValidationResult.isValid()) {
            return;
        }
        for (final ValidationError validationError : childValidationResult.getValidationErrors()) {
            result.addValidationError(validationError);
        }
    }
}
Also used : RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Aggregations

RecordSchema (org.apache.nifi.serialization.record.RecordSchema)12 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)12 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)9 DataType (org.apache.nifi.serialization.record.DataType)8 RecordField (org.apache.nifi.serialization.record.RecordField)8 ArrayDataType (org.apache.nifi.serialization.record.type.ArrayDataType)8 Map (java.util.Map)5 ChoiceDataType (org.apache.nifi.serialization.record.type.ChoiceDataType)5 MapDataType (org.apache.nifi.serialization.record.type.MapDataType)5 HashMap (java.util.HashMap)4 Schema (org.apache.avro.Schema)4 MapRecord (org.apache.nifi.serialization.record.MapRecord)4 Record (org.apache.nifi.serialization.record.Record)4 LinkedHashMap (java.util.LinkedHashMap)3 Test (org.junit.Test)3 List (java.util.List)2 GenericData (org.apache.avro.generic.GenericData)2 Record (org.apache.avro.generic.GenericData.Record)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)2