Search in sources :

Example 1 with SchemaValidationResult

use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.

the class ValidateRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
    final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
    RecordSetWriter validWriter = null;
    RecordSetWriter invalidWriter = null;
    FlowFile validFlowFile = null;
    FlowFile invalidFlowFile = null;
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
        final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
        final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
        final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
        int recordCount = 0;
        int validCount = 0;
        int invalidCount = 0;
        final Set<String> extraFields = new HashSet<>();
        final Set<String> missingFields = new HashSet<>();
        final Set<String> invalidFields = new HashSet<>();
        final Set<String> otherProblems = new HashSet<>();
        try {
            Record record;
            while ((record = reader.nextRecord(false, false)) != null) {
                final SchemaValidationResult result = validator.validate(record);
                recordCount++;
                RecordSetWriter writer;
                if (result.isValid()) {
                    validCount++;
                    if (validFlowFile == null) {
                        validFlowFile = session.create(flowFile);
                    }
                    validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
                } else {
                    invalidCount++;
                    logValidationErrors(flowFile, recordCount, result);
                    if (invalidFlowFile == null) {
                        invalidFlowFile = session.create(flowFile);
                    }
                    invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
                    // that it is too noisy to be useful.
                    for (final ValidationError validationError : result.getValidationErrors()) {
                        final Optional<String> fieldName = validationError.getFieldName();
                        switch(validationError.getType()) {
                            case EXTRA_FIELD:
                                if (fieldName.isPresent()) {
                                    extraFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case MISSING_FIELD:
                                if (fieldName.isPresent()) {
                                    missingFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case INVALID_FIELD:
                                if (fieldName.isPresent()) {
                                    invalidFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case OTHER:
                                otherProblems.add(validationError.getExplanation());
                                break;
                        }
                    }
                }
                if (writer instanceof RawRecordWriter) {
                    ((RawRecordWriter) writer).writeRawRecord(record);
                } else {
                    writer.write(record);
                }
            }
            if (validWriter != null) {
                completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
            }
            if (invalidWriter != null) {
                // Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
                final StringBuilder errorBuilder = new StringBuilder();
                errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
                if (!missingFields.isEmpty()) {
                    errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
                }
                if (!extraFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
                }
                if (!invalidFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
                }
                if (!otherProblems.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
                }
                final String validationErrorString = errorBuilder.toString();
                completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
            }
        } finally {
            closeQuietly(validWriter);
            closeQuietly(invalidWriter);
        }
        session.adjustCounter("Records Validated", recordCount, false);
        session.adjustCounter("Records Found Valid", validCount, false);
        session.adjustCounter("Records Found Invalid", invalidCount, false);
    } catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        if (validFlowFile != null) {
            session.remove(validFlowFile);
        }
        if (invalidFlowFile != null) {
            session.remove(invalidFlowFile);
        }
        return;
    }
    session.remove(flowFile);
}
Also used : RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StandardSchemaValidator(org.apache.nifi.schema.validation.StandardSchemaValidator) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSchemaValidator(org.apache.nifi.serialization.record.validation.RecordSchemaValidator) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RawRecordWriter(org.apache.nifi.serialization.record.RawRecordWriter) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashSet(java.util.HashSet) SchemaValidationContext(org.apache.nifi.schema.validation.SchemaValidationContext) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 2 with SchemaValidationResult

use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.

the class TestStandardSchemaValidator method testMissingNullableField.

@Test
public void testMissingNullableField() {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    Record record = new MapRecord(schema, valueMap, false, false);
    final SchemaValidationContext allowExtraFieldsContext = new SchemaValidationContext(schema, true, true);
    StandardSchemaValidator validator = new StandardSchemaValidator(allowExtraFieldsContext);
    SchemaValidationResult result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 3 with SchemaValidationResult

use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.

the class TestStandardSchemaValidator method testValidateWrongButCoerceableType.

@Test
public void testValidateWrongButCoerceableType() throws ParseException {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    Record record = new MapRecord(schema, valueMap);
    final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
    final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
    // Validate with correct type of int and a strict validation
    StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
    SchemaValidationResult result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
    // Validate with correct type of int and a lenient validation
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
    // Update Map to set value to a String that is coerceable to an int
    valueMap.put("id", "1");
    record = new MapRecord(schema, valueMap);
    // Validate with incorrect type of string and a strict validation
    validator = new StandardSchemaValidator(strictValidationContext);
    result = validator.validate(record);
    assertFalse(result.isValid());
    final Collection<ValidationError> validationErrors = result.getValidationErrors();
    assertEquals(1, validationErrors.size());
    final ValidationError validationError = validationErrors.iterator().next();
    assertEquals("/id", validationError.getFieldName().get());
    // Validate with incorrect type of string and a lenient validation
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 4 with SchemaValidationResult

use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.

the class TestStandardSchemaValidator method testInvalidArrayValue.

@Test
public void testInvalidArrayValue() {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("numbers", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.INT.getDataType())));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    valueMap.put("numbers", new Object[] { 1, "2", "3" });
    Record record = new MapRecord(schema, valueMap, false, false);
    final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
    final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
    StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
    SchemaValidationResult result = validator.validate(record);
    assertFalse(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertEquals(1, result.getValidationErrors().size());
    final ValidationError validationError = result.getValidationErrors().iterator().next();
    assertEquals("/numbers", validationError.getFieldName().get());
    validator = new StandardSchemaValidator(lenientValidationContext);
    result = validator.validate(record);
    assertTrue(result.isValid());
    assertNotNull(result.getValidationErrors());
    assertTrue(result.getValidationErrors().isEmpty());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 5 with SchemaValidationResult

use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.

the class TestStandardSchemaValidator method testMissingRequiredField.

@Test
public void testMissingRequiredField() {
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("name", RecordFieldType.STRING.getDataType(), false));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("id", 1);
    final Record record = new MapRecord(schema, valueMap, false, false);
    final SchemaValidationContext allowExtraFieldsContext = new SchemaValidationContext(schema, true, true);
    StandardSchemaValidator validator = new StandardSchemaValidator(allowExtraFieldsContext);
    SchemaValidationResult result = validator.validate(record);
    assertFalse(result.isValid());
    assertNotNull(result.getValidationErrors());
    final ValidationError error = result.getValidationErrors().iterator().next();
    assertEquals("/name", error.getFieldName().get());
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Aggregations

Record (org.apache.nifi.serialization.record.Record)9 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)9 SchemaValidationResult (org.apache.nifi.serialization.record.validation.SchemaValidationResult)9 ArrayList (java.util.ArrayList)7 LinkedHashMap (java.util.LinkedHashMap)7 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)7 MapRecord (org.apache.nifi.serialization.record.MapRecord)7 RecordField (org.apache.nifi.serialization.record.RecordField)7 ValidationError (org.apache.nifi.serialization.record.validation.ValidationError)7 Test (org.junit.Test)7 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Date (java.sql.Date)1 Time (java.sql.Time)1 Timestamp (java.sql.Timestamp)1 DateFormat (java.text.DateFormat)1 SimpleDateFormat (java.text.SimpleDateFormat)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1