use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.
the class ValidateRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
RecordSetWriter validWriter = null;
RecordSetWriter invalidWriter = null;
FlowFile validFlowFile = null;
FlowFile invalidFlowFile = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
int recordCount = 0;
int validCount = 0;
int invalidCount = 0;
final Set<String> extraFields = new HashSet<>();
final Set<String> missingFields = new HashSet<>();
final Set<String> invalidFields = new HashSet<>();
final Set<String> otherProblems = new HashSet<>();
try {
Record record;
while ((record = reader.nextRecord(false, false)) != null) {
final SchemaValidationResult result = validator.validate(record);
recordCount++;
RecordSetWriter writer;
if (result.isValid()) {
validCount++;
if (validFlowFile == null) {
validFlowFile = session.create(flowFile);
}
validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
} else {
invalidCount++;
logValidationErrors(flowFile, recordCount, result);
if (invalidFlowFile == null) {
invalidFlowFile = session.create(flowFile);
}
invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
// that it is too noisy to be useful.
for (final ValidationError validationError : result.getValidationErrors()) {
final Optional<String> fieldName = validationError.getFieldName();
switch(validationError.getType()) {
case EXTRA_FIELD:
if (fieldName.isPresent()) {
extraFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case MISSING_FIELD:
if (fieldName.isPresent()) {
missingFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case INVALID_FIELD:
if (fieldName.isPresent()) {
invalidFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case OTHER:
otherProblems.add(validationError.getExplanation());
break;
}
}
}
if (writer instanceof RawRecordWriter) {
((RawRecordWriter) writer).writeRawRecord(record);
} else {
writer.write(record);
}
}
if (validWriter != null) {
completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
}
if (invalidWriter != null) {
// Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
final StringBuilder errorBuilder = new StringBuilder();
errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
if (!missingFields.isEmpty()) {
errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
}
if (!extraFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
}
if (!invalidFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
}
if (!otherProblems.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
}
final String validationErrorString = errorBuilder.toString();
completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
}
} finally {
closeQuietly(validWriter);
closeQuietly(invalidWriter);
}
session.adjustCounter("Records Validated", recordCount, false);
session.adjustCounter("Records Found Valid", validCount, false);
session.adjustCounter("Records Found Invalid", invalidCount, false);
} catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
if (validFlowFile != null) {
session.remove(validFlowFile);
}
if (invalidFlowFile != null) {
session.remove(invalidFlowFile);
}
return;
}
session.remove(flowFile);
}
use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.
the class TestStandardSchemaValidator method testMissingNullableField.
@Test
public void testMissingNullableField() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> valueMap = new LinkedHashMap<>();
valueMap.put("id", 1);
Record record = new MapRecord(schema, valueMap, false, false);
final SchemaValidationContext allowExtraFieldsContext = new SchemaValidationContext(schema, true, true);
StandardSchemaValidator validator = new StandardSchemaValidator(allowExtraFieldsContext);
SchemaValidationResult result = validator.validate(record);
assertTrue(result.isValid());
assertNotNull(result.getValidationErrors());
assertTrue(result.getValidationErrors().isEmpty());
}
use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.
the class TestStandardSchemaValidator method testValidateWrongButCoerceableType.
@Test
public void testValidateWrongButCoerceableType() throws ParseException {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> valueMap = new LinkedHashMap<>();
valueMap.put("id", 1);
Record record = new MapRecord(schema, valueMap);
final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
// Validate with correct type of int and a strict validation
StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
SchemaValidationResult result = validator.validate(record);
assertTrue(result.isValid());
assertNotNull(result.getValidationErrors());
assertTrue(result.getValidationErrors().isEmpty());
// Validate with correct type of int and a lenient validation
validator = new StandardSchemaValidator(lenientValidationContext);
result = validator.validate(record);
assertTrue(result.isValid());
assertNotNull(result.getValidationErrors());
assertTrue(result.getValidationErrors().isEmpty());
// Update Map to set value to a String that is coerceable to an int
valueMap.put("id", "1");
record = new MapRecord(schema, valueMap);
// Validate with incorrect type of string and a strict validation
validator = new StandardSchemaValidator(strictValidationContext);
result = validator.validate(record);
assertFalse(result.isValid());
final Collection<ValidationError> validationErrors = result.getValidationErrors();
assertEquals(1, validationErrors.size());
final ValidationError validationError = validationErrors.iterator().next();
assertEquals("/id", validationError.getFieldName().get());
// Validate with incorrect type of string and a lenient validation
validator = new StandardSchemaValidator(lenientValidationContext);
result = validator.validate(record);
assertTrue(result.isValid());
assertNotNull(result.getValidationErrors());
assertTrue(result.getValidationErrors().isEmpty());
}
use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.
the class TestStandardSchemaValidator method testInvalidArrayValue.
@Test
public void testInvalidArrayValue() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("numbers", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.INT.getDataType())));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> valueMap = new LinkedHashMap<>();
valueMap.put("id", 1);
valueMap.put("numbers", new Object[] { 1, "2", "3" });
Record record = new MapRecord(schema, valueMap, false, false);
final SchemaValidationContext strictValidationContext = new SchemaValidationContext(schema, false, true);
final SchemaValidationContext lenientValidationContext = new SchemaValidationContext(schema, false, false);
StandardSchemaValidator validator = new StandardSchemaValidator(strictValidationContext);
SchemaValidationResult result = validator.validate(record);
assertFalse(result.isValid());
assertNotNull(result.getValidationErrors());
assertEquals(1, result.getValidationErrors().size());
final ValidationError validationError = result.getValidationErrors().iterator().next();
assertEquals("/numbers", validationError.getFieldName().get());
validator = new StandardSchemaValidator(lenientValidationContext);
result = validator.validate(record);
assertTrue(result.isValid());
assertNotNull(result.getValidationErrors());
assertTrue(result.getValidationErrors().isEmpty());
}
use of org.apache.nifi.serialization.record.validation.SchemaValidationResult in project nifi by apache.
the class TestStandardSchemaValidator method testMissingRequiredField.
@Test
public void testMissingRequiredField() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType(), false));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> valueMap = new LinkedHashMap<>();
valueMap.put("id", 1);
final Record record = new MapRecord(schema, valueMap, false, false);
final SchemaValidationContext allowExtraFieldsContext = new SchemaValidationContext(schema, true, true);
StandardSchemaValidator validator = new StandardSchemaValidator(allowExtraFieldsContext);
SchemaValidationResult result = validator.validate(record);
assertFalse(result.isValid());
assertNotNull(result.getValidationErrors());
final ValidationError error = result.getValidationErrors().iterator().next();
assertEquals("/name", error.getFieldName().get());
}
Aggregations