use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class SplitRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile original = session.get();
if (original == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
final List<FlowFile> splits = new ArrayList<>();
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(original, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
final RecordSet recordSet = reader.createRecordSet();
final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
while (pushbackSet.isAnotherRecord()) {
FlowFile split = session.create(original);
try {
final Map<String, String> attributes = new HashMap<>();
final WriteResult writeResult;
try (final OutputStream out = session.write(split);
final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
if (maxRecords == 1) {
final Record record = pushbackSet.next();
writeResult = writer.write(record);
} else {
final RecordSet limitedSet = pushbackSet.limit(maxRecords);
writeResult = writer.write(limitedSet);
}
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
}
split = session.putAllAttributes(split, attributes);
} finally {
splits.add(split);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Failed to parse incoming data", e);
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to split {}", new Object[] { original, pe });
session.remove(splits);
session.transfer(original, REL_FAILURE);
return;
}
session.transfer(original, REL_ORIGINAL);
session.transfer(splits, REL_SPLITS);
getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class MergeRecord method binFlowFile.
private void binFlowFile(final ProcessContext context, final FlowFile flowFile, final ProcessSession session, final RecordBinManager binManager, final boolean block) {
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
final RecordSchema schema = reader.getSchema();
final String groupId = getGroupId(context, flowFile, schema, session);
getLogger().debug("Got Group ID {} for {}", new Object[] { groupId, flowFile });
binManager.add(groupId, flowFile, reader, session, block);
} catch (MalformedRecordException | IOException | SchemaNotFoundException e) {
throw new ProcessException(e);
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class UpdateRecord method updateRecord.
private Record updateRecord(final List<FieldValue> destinationFields, final List<FieldValue> selectedFields, final Record record) {
if (destinationFields.size() == 1 && !destinationFields.get(0).getParentRecord().isPresent()) {
final Object replacement = getReplacementObject(selectedFields);
if (replacement == null) {
return record;
}
if (replacement instanceof Record) {
return (Record) replacement;
}
final List<RecordField> fields = selectedFields.stream().map(FieldValue::getField).collect(Collectors.toList());
final RecordSchema schema = new SimpleRecordSchema(fields);
final Record mapRecord = new MapRecord(schema, new HashMap<>());
for (final FieldValue selectedField : selectedFields) {
mapRecord.setValue(selectedField.getField().getFieldName(), selectedField.getValue());
}
return mapRecord;
} else {
for (final FieldValue fieldVal : destinationFields) {
fieldVal.updateValue(getReplacementObject(selectedFields));
}
return record;
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class ValidateRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
RecordSetWriter validWriter = null;
RecordSetWriter invalidWriter = null;
FlowFile validFlowFile = null;
FlowFile invalidFlowFile = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
int recordCount = 0;
int validCount = 0;
int invalidCount = 0;
final Set<String> extraFields = new HashSet<>();
final Set<String> missingFields = new HashSet<>();
final Set<String> invalidFields = new HashSet<>();
final Set<String> otherProblems = new HashSet<>();
try {
Record record;
while ((record = reader.nextRecord(false, false)) != null) {
final SchemaValidationResult result = validator.validate(record);
recordCount++;
RecordSetWriter writer;
if (result.isValid()) {
validCount++;
if (validFlowFile == null) {
validFlowFile = session.create(flowFile);
}
validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
} else {
invalidCount++;
logValidationErrors(flowFile, recordCount, result);
if (invalidFlowFile == null) {
invalidFlowFile = session.create(flowFile);
}
invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
// that it is too noisy to be useful.
for (final ValidationError validationError : result.getValidationErrors()) {
final Optional<String> fieldName = validationError.getFieldName();
switch(validationError.getType()) {
case EXTRA_FIELD:
if (fieldName.isPresent()) {
extraFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case MISSING_FIELD:
if (fieldName.isPresent()) {
missingFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case INVALID_FIELD:
if (fieldName.isPresent()) {
invalidFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case OTHER:
otherProblems.add(validationError.getExplanation());
break;
}
}
}
if (writer instanceof RawRecordWriter) {
((RawRecordWriter) writer).writeRawRecord(record);
} else {
writer.write(record);
}
}
if (validWriter != null) {
completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
}
if (invalidWriter != null) {
// Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
final StringBuilder errorBuilder = new StringBuilder();
errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
if (!missingFields.isEmpty()) {
errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
}
if (!extraFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
}
if (!invalidFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
}
if (!otherProblems.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
}
final String validationErrorString = errorBuilder.toString();
completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
}
} finally {
closeQuietly(validWriter);
closeQuietly(invalidWriter);
}
session.adjustCounter("Records Validated", recordCount, false);
session.adjustCounter("Records Found Valid", validCount, false);
session.adjustCounter("Records Found Invalid", invalidCount, false);
} catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
if (validFlowFile != null) {
session.remove(validFlowFile);
}
if (invalidFlowFile != null) {
session.remove(invalidFlowFile);
}
return;
}
session.remove(flowFile);
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class ValidateRecord method getValidationSchema.
protected RecordSchema getValidationSchema(final ProcessContext context, final FlowFile flowFile, final RecordReader reader) throws MalformedRecordException, IOException, SchemaNotFoundException {
final String schemaAccessStrategy = context.getProperty(SCHEMA_ACCESS_STRATEGY).getValue();
if (schemaAccessStrategy.equals(READER_SCHEMA.getValue())) {
return reader.getSchema();
} else if (schemaAccessStrategy.equals(SCHEMA_NAME_PROPERTY.getValue())) {
final SchemaRegistry schemaRegistry = context.getProperty(SCHEMA_REGISTRY).asControllerService(SchemaRegistry.class);
final String schemaName = context.getProperty(SCHEMA_NAME).evaluateAttributeExpressions(flowFile).getValue();
final SchemaIdentifier schemaIdentifier = SchemaIdentifier.builder().name(schemaName).build();
return schemaRegistry.retrieveSchema(schemaIdentifier);
} else if (schemaAccessStrategy.equals(SCHEMA_TEXT_PROPERTY.getValue())) {
final String schemaText = context.getProperty(SCHEMA_TEXT).evaluateAttributeExpressions(flowFile).getValue();
final Parser parser = new Schema.Parser();
final Schema avroSchema = parser.parse(schemaText);
return AvroTypeUtil.createSchema(avroSchema);
} else {
throw new ProcessException("Invalid Schema Access Strategy: " + schemaAccessStrategy);
}
}
Aggregations