Search in sources :

Example 6 with SchemaNotFoundException

use of org.apache.nifi.schema.access.SchemaNotFoundException in project nifi by apache.

the class MergeRecord method binFlowFile.

private void binFlowFile(final ProcessContext context, final FlowFile flowFile, final ProcessSession session, final RecordBinManager binManager, final boolean block) {
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
        final RecordSchema schema = reader.getSchema();
        final String groupId = getGroupId(context, flowFile, schema, session);
        getLogger().debug("Got Group ID {} for {}", new Object[] { groupId, flowFile });
        binManager.add(groupId, flowFile, reader, session, block);
    } catch (MalformedRecordException | IOException | SchemaNotFoundException e) {
        throw new ProcessException(e);
    }
}
Also used : ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException)

Example 7 with SchemaNotFoundException

use of org.apache.nifi.schema.access.SchemaNotFoundException in project nifi by apache.

the class ValidateRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean allowExtraFields = context.getProperty(ALLOW_EXTRA_FIELDS).asBoolean();
    final boolean strictTypeChecking = context.getProperty(STRICT_TYPE_CHECKING).asBoolean();
    RecordSetWriter validWriter = null;
    RecordSetWriter invalidWriter = null;
    FlowFile validFlowFile = null;
    FlowFile invalidFlowFile = null;
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
        final RecordSchema validationSchema = getValidationSchema(context, flowFile, reader);
        final SchemaValidationContext validationContext = new SchemaValidationContext(validationSchema, allowExtraFields, strictTypeChecking);
        final RecordSchemaValidator validator = new StandardSchemaValidator(validationContext);
        int recordCount = 0;
        int validCount = 0;
        int invalidCount = 0;
        final Set<String> extraFields = new HashSet<>();
        final Set<String> missingFields = new HashSet<>();
        final Set<String> invalidFields = new HashSet<>();
        final Set<String> otherProblems = new HashSet<>();
        try {
            Record record;
            while ((record = reader.nextRecord(false, false)) != null) {
                final SchemaValidationResult result = validator.validate(record);
                recordCount++;
                RecordSetWriter writer;
                if (result.isValid()) {
                    validCount++;
                    if (validFlowFile == null) {
                        validFlowFile = session.create(flowFile);
                    }
                    validWriter = writer = createIfNecessary(validWriter, writerFactory, session, validFlowFile, record.getSchema());
                } else {
                    invalidCount++;
                    logValidationErrors(flowFile, recordCount, result);
                    if (invalidFlowFile == null) {
                        invalidFlowFile = session.create(flowFile);
                    }
                    invalidWriter = writer = createIfNecessary(invalidWriter, writerFactory, session, invalidFlowFile, record.getSchema());
                    // that it is too noisy to be useful.
                    for (final ValidationError validationError : result.getValidationErrors()) {
                        final Optional<String> fieldName = validationError.getFieldName();
                        switch(validationError.getType()) {
                            case EXTRA_FIELD:
                                if (fieldName.isPresent()) {
                                    extraFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case MISSING_FIELD:
                                if (fieldName.isPresent()) {
                                    missingFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case INVALID_FIELD:
                                if (fieldName.isPresent()) {
                                    invalidFields.add(fieldName.get());
                                } else {
                                    otherProblems.add(validationError.getExplanation());
                                }
                                break;
                            case OTHER:
                                otherProblems.add(validationError.getExplanation());
                                break;
                        }
                    }
                }
                if (writer instanceof RawRecordWriter) {
                    ((RawRecordWriter) writer).writeRawRecord(record);
                } else {
                    writer.write(record);
                }
            }
            if (validWriter != null) {
                completeFlowFile(session, validFlowFile, validWriter, REL_VALID, null);
            }
            if (invalidWriter != null) {
                // Build up a String that explains why the records were invalid, so that we can add this to the Provenance Event.
                final StringBuilder errorBuilder = new StringBuilder();
                errorBuilder.append("Records in this FlowFile were invalid for the following reasons: ");
                if (!missingFields.isEmpty()) {
                    errorBuilder.append("The following ").append(missingFields.size()).append(" fields were missing: ").append(missingFields.toString());
                }
                if (!extraFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(extraFields.size()).append(" fields were present in the Record but not in the schema: ").append(extraFields.toString());
                }
                if (!invalidFields.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(invalidFields.size()).append(" fields had values whose type did not match the schema: ").append(invalidFields.toString());
                }
                if (!otherProblems.isEmpty()) {
                    if (errorBuilder.length() > 0) {
                        errorBuilder.append("; ");
                    }
                    errorBuilder.append("The following ").append(otherProblems.size()).append(" additional problems were encountered: ").append(otherProblems.toString());
                }
                final String validationErrorString = errorBuilder.toString();
                completeFlowFile(session, invalidFlowFile, invalidWriter, REL_INVALID, validationErrorString);
            }
        } finally {
            closeQuietly(validWriter);
            closeQuietly(invalidWriter);
        }
        session.adjustCounter("Records Validated", recordCount, false);
        session.adjustCounter("Records Found Valid", validCount, false);
        session.adjustCounter("Records Found Invalid", invalidCount, false);
    } catch (final IOException | MalformedRecordException | SchemaNotFoundException e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        if (validFlowFile != null) {
            session.remove(validFlowFile);
        }
        if (invalidFlowFile != null) {
            session.remove(invalidFlowFile);
        }
        return;
    }
    session.remove(flowFile);
}
Also used : RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StandardSchemaValidator(org.apache.nifi.schema.validation.StandardSchemaValidator) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSchemaValidator(org.apache.nifi.serialization.record.validation.RecordSchemaValidator) SchemaValidationResult(org.apache.nifi.serialization.record.validation.SchemaValidationResult) Record(org.apache.nifi.serialization.record.Record) ValidationError(org.apache.nifi.serialization.record.validation.ValidationError) RawRecordWriter(org.apache.nifi.serialization.record.RawRecordWriter) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashSet(java.util.HashSet) SchemaValidationContext(org.apache.nifi.schema.validation.SchemaValidationContext) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 8 with SchemaNotFoundException

use of org.apache.nifi.schema.access.SchemaNotFoundException in project nifi by apache.

the class RecordBin method offer.

public boolean offer(final FlowFile flowFile, final RecordReader recordReader, final ProcessSession flowFileSession, final boolean block) throws IOException, MalformedRecordException, SchemaNotFoundException {
    if (isComplete()) {
        logger.debug("RecordBin.offer for id={} returning false because {} is complete", new Object[] { flowFile.getId(), this });
        return false;
    }
    final boolean locked;
    if (block) {
        writeLock.lock();
        locked = true;
    } else {
        locked = writeLock.tryLock();
    }
    if (!locked) {
        logger.debug("RecordBin.offer for id={} returning false because failed to get lock for {}", new Object[] { flowFile.getId(), this });
        return false;
    }
    boolean flowFileMigrated = false;
    try {
        if (isComplete()) {
            logger.debug("RecordBin.offer for id={} returning false because {} is complete", new Object[] { flowFile.getId(), this });
            return false;
        }
        logger.debug("Migrating id={} to {}", new Object[] { flowFile.getId(), this });
        Record record;
        while ((record = recordReader.nextRecord()) != null) {
            if (recordWriter == null) {
                final OutputStream rawOut = session.write(merged);
                logger.debug("Created OutputStream using session {} for {}", new Object[] { session, this });
                this.out = new ByteCountingOutputStream(rawOut);
                recordWriter = writerFactory.createWriter(logger, record.getSchema(), out);
                recordWriter.beginRecordSet();
            }
            recordWriter.write(record);
            recordCount++;
        }
        // This will be closed by the MergeRecord class anyway but we have to close it
        // here because it needs to be closed before we are able to migrate the FlowFile
        // to a new Session.
        recordReader.close();
        flowFileSession.migrate(this.session, Collections.singleton(flowFile));
        flowFileMigrated = true;
        this.flowFiles.add(flowFile);
        if (isFull()) {
            logger.debug(this + " is now full. Completing bin.");
            complete("Bin is full");
        } else if (isOlderThan(thresholds.getMaxBinMillis(), TimeUnit.MILLISECONDS)) {
            logger.debug(this + " is now expired. Completing bin.");
            complete("Bin is older than " + thresholds.getMaxBinAge());
        }
        return true;
    } catch (final Exception e) {
        logger.error("Failed to create merged FlowFile from " + (flowFiles.size() + 1) + " input FlowFiles; routing originals to failure", e);
        try {
            // This will be closed by the MergeRecord class anyway but we have to close it
            // here because it needs to be closed before we are able to migrate the FlowFile
            // to a new Session.
            recordReader.close();
            if (recordWriter != null) {
                recordWriter.close();
            }
            if (this.out != null) {
                this.out.close();
            }
            if (!flowFileMigrated) {
                flowFileSession.migrate(this.session, Collections.singleton(flowFile));
                this.flowFiles.add(flowFile);
            }
        } finally {
            complete = true;
            session.remove(merged);
            session.transfer(flowFiles, MergeRecord.REL_FAILURE);
            session.commit();
        }
        return true;
    } finally {
        writeLock.unlock();
    }
}
Also used : ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) OutputStream(java.io.OutputStream) Record(org.apache.nifi.serialization.record.Record) MergeRecord(org.apache.nifi.processors.standard.MergeRecord) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException)

Example 9 with SchemaNotFoundException

use of org.apache.nifi.schema.access.SchemaNotFoundException in project nifi by apache.

the class AbstractRecordProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, String> attributes = new HashMap<>();
    final AtomicInteger recordCount = new AtomicInteger();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = flowFile.getAttributes();
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream in, final OutputStream out) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) {
                        writer.beginRecordSet();
                        Record record;
                        while ((record = reader.nextRecord()) != null) {
                            final Record processed = AbstractRecordProcessor.this.process(record, writeSchema, original, context);
                            writer.write(processed);
                        }
                        final WriteResult writeResult = writer.finishRecordSet();
                        attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                        attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                        attributes.putAll(writeResult.getAttributes());
                        recordCount.set(writeResult.getRecordCount());
                    }
                } catch (final SchemaNotFoundException e) {
                    throw new ProcessException(e.getLocalizedMessage(), e);
                } catch (final MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, REL_SUCCESS);
    final int count = recordCount.get();
    session.adjustCounter("Records Processed", count, false);
    getLogger().info("Successfully converted {} records for {}", new Object[] { count, flowFile });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StreamCallback(org.apache.nifi.processor.io.StreamCallback) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 10 with SchemaNotFoundException

use of org.apache.nifi.schema.access.SchemaNotFoundException in project nifi by apache.

the class AbstractRouteRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final T flowFileContext;
    try {
        flowFileContext = getFlowFileContext(flowFile, context);
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final AtomicInteger numRecords = new AtomicInteger(0);
    final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    Record record;
                    while ((record = reader.nextRecord()) != null) {
                        final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
                        numRecords.incrementAndGet();
                        for (final Relationship relationship : relationships) {
                            final RecordSetWriter recordSetWriter;
                            Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
                            if (tuple == null) {
                                FlowFile outFlowFile = session.create(original);
                                final OutputStream out = session.write(outFlowFile);
                                recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
                                recordSetWriter.beginRecordSet();
                                tuple = new Tuple<>(outFlowFile, recordSetWriter);
                                writers.put(relationship, tuple);
                            } else {
                                recordSetWriter = tuple.getValue();
                            }
                            recordSetWriter.write(record);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
        for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
            final Relationship relationship = entry.getKey();
            final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
            final RecordSetWriter writer = tuple.getValue();
            FlowFile childFlowFile = tuple.getKey();
            final WriteResult writeResult = writer.finishRecordSet();
            try {
                writer.close();
            } catch (final IOException ioe) {
                getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.putAll(writeResult.getAttributes());
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.transfer(childFlowFile, relationship);
            session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
            session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
            session.getProvenanceReporter().route(childFlowFile, relationship);
        }
    } catch (final Exception e) {
        getLogger().error("Failed to process {}", new Object[] { flowFile, e });
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            try {
                tuple.getValue().close();
            } catch (final Exception e1) {
                getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
            }
            session.remove(tuple.getKey());
        }
        session.transfer(flowFile, REL_FAILURE);
        return;
    } finally {
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            final RecordSetWriter writer = tuple.getValue();
            try {
                writer.close();
            } catch (final Exception e) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
            }
        }
    }
    if (isRouteOriginal()) {
        flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
        session.transfer(flowFile, REL_ORIGINAL);
    } else {
        session.remove(flowFile);
    }
    getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Relationship(org.apache.nifi.processor.Relationship) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.nifi.util.Tuple)

Aggregations

SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)26 IOException (java.io.IOException)19 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)19 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)13 InputStream (java.io.InputStream)12 RecordReader (org.apache.nifi.serialization.RecordReader)12 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)12 FlowFile (org.apache.nifi.flowfile.FlowFile)11 ProcessException (org.apache.nifi.processor.exception.ProcessException)11 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)10 Record (org.apache.nifi.serialization.record.Record)9 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)8 OutputStream (java.io.OutputStream)7 Map (java.util.Map)7 HashMap (java.util.HashMap)6 WriteResult (org.apache.nifi.serialization.WriteResult)5 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)4 BufferedInputStream (java.io.BufferedInputStream)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3