use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class ScriptedReader method reloadScript.
/**
* Reloads the script RecordReaderFactory. This must be called within the lock.
*
* @param scriptBody An input stream associated with the script content
* @return Whether the script was successfully reloaded
*/
protected boolean reloadScript(final String scriptBody) {
// note we are starting here with a fresh listing of validation
// results since we are (re)loading a new/updated script. any
// existing validation results are not relevant
final Collection<ValidationResult> results = new HashSet<>();
try {
// get the engine and ensure its invocable
if (scriptEngine instanceof Invocable) {
final Invocable invocable = (Invocable) scriptEngine;
// Find a custom configurator and invoke their eval() method
ScriptEngineConfigurator configurator = scriptingComponentHelper.scriptEngineConfiguratorMap.get(scriptingComponentHelper.getScriptEngineName().toLowerCase());
if (configurator != null) {
configurator.eval(scriptEngine, scriptBody, scriptingComponentHelper.getModules());
} else {
// evaluate the script
scriptEngine.eval(scriptBody);
}
// get configured processor from the script (if it exists)
final Object obj = scriptEngine.get("reader");
if (obj != null) {
final ComponentLog logger = getLogger();
try {
// set the logger if the processor wants it
invocable.invokeMethod(obj, "setLogger", logger);
} catch (final NoSuchMethodException nsme) {
if (logger.isDebugEnabled()) {
logger.debug("Configured script RecordReaderFactory does not contain a setLogger method.");
}
}
if (configurationContext != null) {
try {
// set the logger if the processor wants it
invocable.invokeMethod(obj, "setConfigurationContext", configurationContext);
} catch (final NoSuchMethodException nsme) {
if (logger.isDebugEnabled()) {
logger.debug("Configured script RecordReaderFactory does not contain a setConfigurationContext method.");
}
}
}
// record the processor for use later
final RecordReaderFactory scriptedReader = invocable.getInterface(obj, RecordReaderFactory.class);
recordFactory.set(scriptedReader);
} else {
throw new ScriptException("No RecordReader was defined by the script.");
}
}
} catch (final Exception ex) {
final ComponentLog logger = getLogger();
final String message = "Unable to load script: " + ex.getLocalizedMessage();
logger.error(message, ex);
results.add(new ValidationResult.Builder().subject("ScriptValidation").valid(false).explanation("Unable to load script due to " + ex.getLocalizedMessage()).input(scriptingComponentHelper.getScriptPath()).build());
}
// store the updated validation results
validationResults.set(results);
// return whether there was any issues loading the configured script
return results.isEmpty();
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class ValidateRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
RecordSetWriter validWriter = null;
RecordSetWriter invalidWriter = null;
FlowFile validFlowFile = null;
FlowFile invalidFlowFile = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
int recordCount = 0;
int validCount = 0;
int invalidCount = 0;
final Set<String> extraFields = new HashSet<>();
final Set<String> missingFields = new HashSet<>();
final Set<String> invalidFields = new HashSet<>();
final Set<String> otherProblems = new HashSet<>();
try {
Record record;
while ((record = reader.nextRecord(false, false)) != null) {
final SchemaValidationResult result = validator.validate(record);
recordCount++;
RecordSetWriter writer;
if (result.isValid()) {
validCount++;
if (validFlowFile == null) {
validFlowFile = session.create(flowFile);
}
validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
} else {
invalidCount++;
logValidationErrors(flowFile, recordCount, result);
if (invalidFlowFile == null) {
invalidFlowFile = session.create(flowFile);
}
invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
// that it is too noisy to be useful.
for (final ValidationError validationError : result.getValidationErrors()) {
final Optional<String> fieldName = validationError.getFieldName();
switch(validationError.getType()) {
case EXTRA_FIELD:
if (fieldName.isPresent()) {
extraFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case MISSING_FIELD:
if (fieldName.isPresent()) {
missingFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case INVALID_FIELD:
if (fieldName.isPresent()) {
invalidFields.add(fieldName.get());
} else {
otherProblems.add(validationError.getExplanation());
}
break;
case OTHER:
otherProblems.add(validationError.getExplanation());
break;
}
}
}
if (writer instanceof RawRecordWriter) {
((RawRecordWriter) writer).writeRawRecord(record);
} else {
writer.write(record);
}
}
if (validWriter != null) {
completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
}
if (invalidWriter != null) {
// Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
final StringBuilder errorBuilder = new StringBuilder();
errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
if (!missingFields.isEmpty()) {
errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
}
if (!extraFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
}
if (!invalidFields.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
}
if (!otherProblems.isEmpty()) {
if (errorBuilder.length() > 0) {
errorBuilder.append("; ");
}
errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
}
final String validationErrorString = errorBuilder.toString();
completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
}
} finally {
closeQuietly(validWriter);
closeQuietly(invalidWriter);
}
session.adjustCounter("Records Validated", recordCount, false);
session.adjustCounter("Records Found Valid", validCount, false);
session.adjustCounter("Records Found Invalid", invalidCount, false);
} catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
if (validFlowFile != null) {
session.remove(validFlowFile);
}
if (invalidFlowFile != null) {
session.remove(invalidFlowFile);
}
return;
}
session.remove(flowFile);
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class AbstractRecordProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, String> attributes = new HashMap<>();
final AtomicInteger recordCount = new AtomicInteger();
final FlowFile original = flowFile;
final Map<String, String> originalAttributes = flowFile.getAttributes();
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) {
writer.beginRecordSet();
Record record;
while ((record = reader.nextRecord()) != null) {
final Record processed = AbstractRecordProcessor.this.process(record, writeSchema, original, context);
writer.write(processed);
}
final WriteResult writeResult = writer.finishRecordSet();
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
recordCount.set(writeResult.getRecordCount());
}
} catch (final SchemaNotFoundException e) {
throw new ProcessException(e.getLocalizedMessage(), e);
} catch (final MalformedRecordException e) {
throw new ProcessException("Could not parse incoming data", e);
}
}
});
} catch (final Exception e) {
getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final int count = recordCount.get();
session.adjustCounter("Records Processed", count, false);
getLogger().info("Successfully converted {} records for {}", new Object[] { count, flowFile });
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class AbstractRouteRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final T flowFileContext;
try {
flowFileContext = getFlowFileContext(flowFile, context);
} catch (final Exception e) {
getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final AtomicInteger numRecords = new AtomicInteger(0);
final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
final FlowFile original = flowFile;
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
numRecords.incrementAndGet();
for (final Relationship relationship : relationships) {
final RecordSetWriter recordSetWriter;
Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
if (tuple == null) {
FlowFile outFlowFile = session.create(original);
final OutputStream out = session.write(outFlowFile);
recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
recordSetWriter.beginRecordSet();
tuple = new Tuple<>(outFlowFile, recordSetWriter);
writers.put(relationship, tuple);
} else {
recordSetWriter = tuple.getValue();
}
recordSetWriter.write(record);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Could not parse incoming data", e);
}
}
});
for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
final Relationship relationship = entry.getKey();
final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
final RecordSetWriter writer = tuple.getValue();
FlowFile childFlowFile = tuple.getKey();
final WriteResult writeResult = writer.finishRecordSet();
try {
writer.close();
} catch (final IOException ioe) {
getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
}
final Map<String, String> attributes = new HashMap<>();
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.transfer(childFlowFile, relationship);
session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
session.getProvenanceReporter().route(childFlowFile, relationship);
}
} catch (final Exception e) {
getLogger().error("Failed to process {}", new Object[] { flowFile, e });
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
try {
tuple.getValue().close();
} catch (final Exception e1) {
getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
}
session.remove(tuple.getKey());
}
session.transfer(flowFile, REL_FAILURE);
return;
} finally {
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
final RecordSetWriter writer = tuple.getValue();
try {
writer.close();
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
}
}
}
if (isRouteOriginal()) {
flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
session.transfer(flowFile, REL_ORIGINAL);
} else {
session.remove(flowFile);
}
getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class PublishKafkaRecord_0_10 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
if (flowFiles.isEmpty()) {
return;
}
final PublisherPool pool = getPublisherPool(context);
if (pool == null) {
context.yield();
return;
}
final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final long startTime = System.nanoTime();
try (final PublisherLease lease = pool.obtainPublisher()) {
// Send each FlowFile to Kafka asynchronously.
for (final FlowFile flowFile : flowFiles) {
if (!isScheduled()) {
// If stopped, re-queue FlowFile instead of sending it
session.transfer(flowFile);
continue;
}
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final Map<String, String> attributes = flowFile.getAttributes();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final RecordReader reader = readerFactory.createRecordReader(attributes, in, getLogger());
final RecordSet recordSet = reader.createRecordSet();
final RecordSchema schema = writerFactory.getSchema(attributes, recordSet.getSchema());
lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException(e);
}
}
});
} catch (final Exception e) {
// The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
lease.getTracker().fail(flowFile, e);
continue;
}
}
// Complete the send
final PublishResult publishResult = lease.complete();
// Transfer any successful FlowFiles.
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
for (FlowFile success : publishResult.getSuccessfulFlowFiles()) {
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
final int msgCount = publishResult.getSuccessfulMessageCount(success);
success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
session.adjustCounter("Messages Sent", msgCount, true);
final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
session.transfer(success, REL_SUCCESS);
}
// Transfer any failures.
for (final FlowFile failure : publishResult.getFailedFlowFiles()) {
final int successCount = publishResult.getSuccessfulMessageCount(failure);
if (successCount > 0) {
getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", new Object[] { failure, successCount, publishResult.getReasonForFailure(failure) });
} else {
getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", new Object[] { failure, publishResult.getReasonForFailure(failure) });
}
session.transfer(failure, REL_FAILURE);
}
}
}
Aggregations