Search in sources :

Example 1 with ValidationError

use of org.apache.nifi.serialization.record.validation.ValidationError in project nifi by apache.

the class ValidateRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
    final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
    RecordSetWriter validWriter = null;
    RecordSetWriter invalidWriter = null;
    FlowFile validFlowFile = null;
    FlowFile invalidFlowFile = null;
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
        final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
        final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
        final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
        int recordCount = 0;
        int validCount = 0;
        int invalidCount = 0;
        final Set<String> extraFields = new HashSet<>();
        final Set<String> missingFields = new HashSet<>();
        final Set<String> invalidFields = new HashSet<>();
        final Set<String> otherProblems = new HashSet<>();
        try {
            Record record;
            while ((record = reader.nextRecord(false, false)) != null) {
                final SchemaValidationResult result = validator.validate(record);
                recordCount++;
                RecordSetWriter writer;
                if (result.isValid()) {
                    validCount++;
                    if (validFlowFile == null) {
                        validFlowFile = session.create(flowFile);
                    }
                    validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
                } else {
                    invalidCount++;
                    logValidationErrors(flowFile, recordCount, result);
                    if (invalidFlowFile == null) {
                        invalidFlowFile = session.create(flowFile);
                    }
                    invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
                    // that it is too noisy to be useful.
                    for (final ValidationError validationError : result.getValidationErrors()) {
                        final Optional<String> fieldName = validationError.getFieldName();
                        switch(validationError.getType()) {
                            case EXTRA_FIELD:
                                if (fieldName.isPresent()) {
                                    extraFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case MISSING_FIELD:
                                if (fieldName.isPresent()) {
                                    missingFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case INVALID_FIELD:
                                if (fieldName.isPresent()) {
                                    invalidFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case OTHER:
                                otherProblems.add(validationError.getExplanation());
                                break;
                        }
                    }
                }
                if (writer instanceof RawRecordWriter) {
                    ((RawRecordWriter) writer).writeRawRecord(record);
                } else {
                    writer.write(record);
                }
            }
            if (validWriter != null) {
                completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
            }
            if (invalidWriter != null) {
                // Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
                final StringBuilder errorBuilder = new StringBuilder();
                errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
                if (!missingFields.isEmpty()) {
                    errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
                }
                if (!extraFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
                }
                if (!invalidFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
                }
                if (!otherProblems.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
                }
                final String validationErrorString = errorBuilder.toString();
                completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
            }
        } finally {
            closeQuietly(validWriter);
            closeQuietly(invalidWriter);
        }
        session.adjustCounter("Records Validated", recordCount, false);
        session.adjustCounter("Records Found Valid", validCount, false);
        session.adjustCounter("Records Found Invalid", invalidCount, false);
    } catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        if (validFlowFile != null) {
            session.remove(validFlowFile);
        }
        if (invalidFlowFile != null) {
            session.remove(invalidFlowFile);
        }
        return;
    }
    session.remove(flowFile);
}
Also used : RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StandardSchemaValidator(org.apache.nifi.schema.validation.StandardSchemaValidator) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSchemaValidator(org.apache.nifi.serialization.record.validation.RecordSchemaValidator) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RawRecordWriter(org.apache.nifi.serialization.record.RawRecordWriter) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashSet(java.util.HashSet) SchemaValidationContext(org.apache.nifi.schema.validation.SchemaValidationContext) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 2 with ValidationError

use of org.apache.nifi.serialization.record.validation.ValidationError in project nifi by apache.

the class TestStandardSchemaValidator method testValidateWrongButCoerceableType.

@Test
public void testValidateWrongButCoerceableType() throws ParseException {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    Record record = new MapRecord(schema, valueMap);
    final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
    final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
    // Validate with correct type of int and a strict validation
    StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
    SchemaValidationResult result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
    // Validate with correct type of int and a lenient validation
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
    // Update Map to set value to a String that is coerceable to an int
    valueMap.put("id", "1");
    record = new MapRecord(schema, valueMap);
    // Validate with incorrect type of string and a strict validation
    validator = new StandardSchemaValidator(strictValidationContext);
    result = validator.validate(record);
    assertFalse(result.isValid());
    final Collection<ValidationError> validationErrors = result.getValidationErrors();
    assertEquals(1, validationErrors.size());
    final ValidationError validationError = validationErrors.iterator().next();
    assertEquals("/id", validationError.getFieldName().get());
    // Validate with incorrect type of string and a lenient validation
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 3 with ValidationError

use of org.apache.nifi.serialization.record.validation.ValidationError in project nifi by apache.

the class TestStandardSchemaValidator method testInvalidArrayValue.

@Test
public void testInvalidArrayValue() {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("numbers", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.INT.getDataType())));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    valueMap.put("numbers", new Object[] { 1, "2", "3" });
    Record record = new MapRecord(schema, valueMap, false, false);
    final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
    final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
    StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
    SchemaValidationResult result = validator.validate(record);
    assertFalse(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertEquals(1, result.getValidationErrors().size());
    final ValidationError validationError = result.getValidationErrors().iterator().next();
    assertEquals("/numbers", validationError.getFieldName().get());
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 4 with ValidationError

use of org.apache.nifi.serialization.record.validation.ValidationError in project nifi by apache.

the class TestStandardSchemaValidator method testMissingRequiredField.

@Test
public void testMissingRequiredField() {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("name", RecordFieldType.STRING.getDataType(), false));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    final Record record = new MapRecord(schema, valueMap, false, false);
    final SchemaValidationContext allowExtraFieldsContext = new SchemaValidationContext(schema, true, true);
    StandardSchemaValidator validator = new StandardSchemaValidator(allowExtraFieldsContext);
    SchemaValidationResult result = validator.validate(record);
    assertFalse(result.isValid());
    assertNotNull(result.getValidationErrors());
    final ValidationError error = result.getValidationErrors().iterator().next();
    assertEquals("/name", error.getFieldName().get());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 5 with ValidationError

use of org.apache.nifi.serialization.record.validation.ValidationError in project nifi by apache.

the class StandardSchemaValidator method verifyChildRecord.

private void verifyChildRecord(final DataType canonicalDataType, final Object rawValue, final DataType expectedDataType, final StandardSchemaValidationResult result, final RecordField field, final String fieldPrefix) {
    // Now that we have the 'canonical data type', we check if it is a Record. If so, we need to validate each sub-field.
    if (canonicalDataType.getFieldType() == RecordFieldType.RECORD) {
        if (!(rawValue instanceof Record)) {
            // sanity check
            result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD, "Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + expectedDataType));
            return;
        }
        final RecordDataType recordDataType = (RecordDataType) canonicalDataType;
        final RecordSchema childSchema = recordDataType.getChildSchema();
        final String fullChildFieldName = concat(fieldPrefix, field);
        final SchemaValidationResult childValidationResult = validate((Record) rawValue, childSchema, fullChildFieldName);
        if (childValidationResult.isValid()) {
            return;
        }
        for (final ValidationError validationError : childValidationResult.getValidationErrors()) {
            result.addValidationError(validationError);
        }
    }
}
Also used : RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Aggregations

ValidationError (org.apache.nifi.serialization.record.validation.ValidationError)8 Record (org.apache.nifi.serialization.record.Record)7 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)7 SchemaValidationResult (org.apache.nifi.serialization.record.validation.SchemaValidationResult)7 ArrayList (java.util.ArrayList)5 LinkedHashMap (java.util.LinkedHashMap)5 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)5 MapRecord (org.apache.nifi.serialization.record.MapRecord)5 RecordField (org.apache.nifi.serialization.record.RecordField)5 Test (org.junit.Test)5 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)1 SchemaValidationContext (org.apache.nifi.schema.validation.SchemaValidationContext)1 StandardSchemaValidator (org.apache.nifi.schema.validation.StandardSchemaValidator)1 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)1 RecordReader (org.apache.nifi.serialization.RecordReader)1