Search in sources :

Example 11 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class GetSolr method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final AtomicBoolean continuePaging = new AtomicBoolean(true);
    final SolrQuery solrQuery = new SolrQuery();
    try {
        if (id_field == null) {
            id_field = getFieldNameOfUniqueKey();
        }
        final String dateField = context.getProperty(DATE_FIELD).getValue();
        final Map<String, String> stateMap = new HashMap<String, String>();
        stateMap.putAll(context.getStateManager().getState(Scope.CLUSTER).toMap());
        solrQuery.setQuery("*:*");
        final String query = context.getProperty(SOLR_QUERY).getValue();
        if (!StringUtils.isBlank(query) && !query.equals("*:*")) {
            solrQuery.addFilterQuery(query);
        }
        final StringBuilder automatedFilterQuery = (new StringBuilder()).append(dateField).append(":[").append(stateMap.get(STATE_MANAGER_FILTER)).append(" TO *]");
        solrQuery.addFilterQuery(automatedFilterQuery.toString());
        final List<String> fieldList = new ArrayList<String>();
        final String returnFields = context.getProperty(RETURN_FIELDS).getValue();
        if (!StringUtils.isBlank(returnFields)) {
            fieldList.addAll(Arrays.asList(returnFields.trim().split("[,]")));
            if (!fieldList.contains(dateField)) {
                fieldList.add(dateField);
                dateFieldNotInSpecifiedFieldsList.set(true);
            }
            for (String returnField : fieldList) {
                solrQuery.addField(returnField.trim());
            }
        }
        solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, stateMap.get(STATE_MANAGER_CURSOR_MARK));
        solrQuery.setRows(context.getProperty(BATCH_SIZE).asInteger());
        final StringBuilder sortClause = (new StringBuilder()).append(dateField).append(" asc,").append(id_field).append(" asc");
        solrQuery.setParam("sort", sortClause.toString());
        while (continuePaging.get()) {
            final QueryRequest req = new QueryRequest(solrQuery);
            if (isBasicAuthEnabled()) {
                req.setBasicAuthCredentials(getUsername(), getPassword());
            }
            logger.debug(solrQuery.toQueryString());
            final QueryResponse response = req.process(getSolrClient());
            final SolrDocumentList documentList = response.getResults();
            if (response.getResults().size() > 0) {
                final SolrDocument lastSolrDocument = documentList.get(response.getResults().size() - 1);
                final String latestDateValue = df.format(lastSolrDocument.get(dateField));
                final String newCursorMark = response.getNextCursorMark();
                solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, newCursorMark);
                stateMap.put(STATE_MANAGER_CURSOR_MARK, newCursorMark);
                stateMap.put(STATE_MANAGER_FILTER, latestDateValue);
                FlowFile flowFile = session.create();
                flowFile = session.putAttribute(flowFile, "solrQuery", solrQuery.toString());
                if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())) {
                    if (dateFieldNotInSpecifiedFieldsList.get()) {
                        for (SolrDocument doc : response.getResults()) {
                            doc.removeFields(dateField);
                        }
                    }
                    flowFile = session.write(flowFile, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response));
                    flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/xml");
                } else {
                    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
                    final RecordSchema schema = writerFactory.getSchema(null, null);
                    final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema);
                    final StringBuffer mimeType = new StringBuffer();
                    flowFile = session.write(flowFile, new OutputStreamCallback() {

                        @Override
                        public void process(final OutputStream out) throws IOException {
                            try {
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out);
                                writer.write(recordSet);
                                writer.flush();
                                mimeType.append(writer.getMimeType());
                            } catch (SchemaNotFoundException e) {
                                throw new ProcessException("Could not parse Solr response", e);
                            }
                        }
                    });
                    flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType.toString());
                }
                session.transfer(flowFile, REL_SUCCESS);
            }
            continuePaging.set(response.getResults().size() == Integer.parseInt(context.getProperty(BATCH_SIZE).getValue()));
        }
        context.getStateManager().setState(stateMap, Scope.CLUSTER);
    } catch (SolrServerException | SchemaNotFoundException | IOException e) {
        context.yield();
        session.rollback();
        logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), e }, e);
        throw new ProcessException(e);
    } catch (final Throwable t) {
        context.yield();
        session.rollback();
        logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), t }, t);
        throw t;
    }
}
Also used : HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSet(org.apache.nifi.serialization.record.RecordSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) SolrDocumentList(org.apache.solr.common.SolrDocumentList) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ProcessException(org.apache.nifi.processor.exception.ProcessException) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 12 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class AbstractFetchHDFSRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        context.yield();
        return;
    }
    ugi.doAs((PrivilegedAction<Object>) () -> {
        FlowFile child = null;
        final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
        try {
            final Path path = new Path(filenameValue);
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
            final StopWatch stopWatch = new StopWatch(true);
            // use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
            child = session.create(originalFlowFile);
            final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
            child = session.write(child, (final OutputStream rawOut) -> {
                try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
                    final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
                    Record record = recordReader.nextRecord();
                    final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
                    try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
                        recordSetWriter.beginRecordSet();
                        if (record != null) {
                            recordSetWriter.write(record);
                        }
                        while ((record = recordReader.nextRecord()) != null) {
                            recordSetWriter.write(record);
                        }
                        writeResult.set(recordSetWriter.finishRecordSet());
                        mimeTypeRef.set(recordSetWriter.getMimeType());
                    }
                } catch (Exception e) {
                    exceptionHolder.set(e);
                }
            });
            stopWatch.stop();
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }
            FlowFile successFlowFile = postProcess(context, session, child, path);
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            successFlowFile = session.putAllAttributes(successFlowFile, attributes);
            final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
            session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
            session.transfer(successFlowFile, REL_SUCCESS);
            session.remove(originalFlowFile);
            return null;
        } catch (final FileNotFoundException | AccessControlException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        } catch (final IOException | FlowFileAccessException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
            session.transfer(session.penalize(originalFlowFile), REL_RETRY);
            context.yield();
        } catch (final Throwable t) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        }
        // if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
        if (child != null) {
            session.remove(child);
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException) StopWatch(org.apache.nifi.util.StopWatch) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) FileSystem(org.apache.hadoop.fs.FileSystem) Record(org.apache.nifi.serialization.record.Record) BufferedOutputStream(java.io.BufferedOutputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) HDFSRecordReader(org.apache.nifi.processors.hadoop.record.HDFSRecordReader)

Example 13 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class ListenTCPRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final SocketChannelRecordReader socketRecordReader = pollForSocketRecordReader();
    if (socketRecordReader == null) {
        return;
    }
    if (socketRecordReader.isClosed()) {
        getLogger().warn("Unable to read records from {}, socket already closed", new Object[] { getRemoteAddress(socketRecordReader) });
        // still need to call close so the overall count is decremented
        IOUtils.closeQuietly(socketRecordReader);
        return;
    }
    final int recordBatchSize = context.getProperty(RECORD_BATCH_SIZE).asInteger();
    final String readerErrorHandling = context.getProperty(READER_ERROR_HANDLING_STRATEGY).getValue();
    final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    // synchronize to ensure there are no stale values in the underlying SocketChannel
    synchronized (socketRecordReader) {
        FlowFile flowFile = session.create();
        try {
            // lazily creating the record reader here b/c we need a flow file, eventually shouldn't have to do this
            RecordReader recordReader = socketRecordReader.getRecordReader();
            if (recordReader == null) {
                recordReader = socketRecordReader.createRecordReader(flowFile, getLogger());
            }
            Record record;
            try {
                record = recordReader.nextRecord();
            } catch (final Exception e) {
                boolean timeout = false;
                // some of the underlying record libraries wrap the real exception in RuntimeException, so check each
                // throwable (starting with the current one) to see if its a SocketTimeoutException
                Throwable cause = e;
                while (cause != null) {
                    if (cause instanceof SocketTimeoutException) {
                        timeout = true;
                        break;
                    }
                    cause = cause.getCause();
                }
                if (timeout) {
                    getLogger().debug("Timeout reading records, will try again later", e);
                    socketReaders.offer(socketRecordReader);
                    session.remove(flowFile);
                    return;
                } else {
                    throw e;
                }
            }
            if (record == null) {
                getLogger().debug("No records available from {}, closing connection", new Object[] { getRemoteAddress(socketRecordReader) });
                IOUtils.closeQuietly(socketRecordReader);
                session.remove(flowFile);
                return;
            }
            String mimeType = null;
            WriteResult writeResult = null;
            final RecordSchema recordSchema = recordSetWriterFactory.getSchema(Collections.EMPTY_MAP, record.getSchema());
            try (final OutputStream out = session.write(flowFile);
                final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out)) {
                // start the record set and write the first record from above
                recordWriter.beginRecordSet();
                writeResult = recordWriter.write(record);
                while (record != null && writeResult.getRecordCount() < recordBatchSize) {
                    // if keeping then null out the record to break out of the loop, which will transfer what we have and close the connection
                    try {
                        record = recordReader.nextRecord();
                    } catch (final SocketTimeoutException ste) {
                        getLogger().debug("Timeout reading records, will try again later", ste);
                        break;
                    } catch (final Exception e) {
                        if (ERROR_HANDLING_DISCARD.getValue().equals(readerErrorHandling)) {
                            throw e;
                        } else {
                            record = null;
                        }
                    }
                    if (record != null) {
                        writeResult = recordWriter.write(record);
                    }
                }
                writeResult = recordWriter.finishRecordSet();
                recordWriter.flush();
                mimeType = recordWriter.getMimeType();
            }
            // if we didn't write any records then we need to remove the flow file
            if (writeResult.getRecordCount() <= 0) {
                getLogger().debug("Removing flow file, no records were written");
                session.remove(flowFile);
            } else {
                final String sender = getRemoteAddress(socketRecordReader);
                final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
                attributes.put(CoreAttributes.MIME_TYPE.key(), mimeType);
                attributes.put("tcp.sender", sender);
                attributes.put("tcp.port", String.valueOf(port));
                attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                flowFile = session.putAllAttributes(flowFile, attributes);
                final String senderHost = sender.startsWith("/") && sender.length() > 1 ? sender.substring(1) : sender;
                final String transitUri = new StringBuilder().append("tcp").append("://").append(senderHost).append(":").append(port).toString();
                session.getProvenanceReporter().receive(flowFile, transitUri);
                session.transfer(flowFile, REL_SUCCESS);
            }
            getLogger().debug("Re-queuing connection for further processing...");
            socketReaders.offer(socketRecordReader);
        } catch (Exception e) {
            getLogger().error("Error processing records: " + e.getMessage(), e);
            IOUtils.closeQuietly(socketRecordReader);
            session.remove(flowFile);
            return;
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) SocketChannelRecordReader(org.apache.nifi.record.listen.SocketChannelRecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) SocketTimeoutException(java.net.SocketTimeoutException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) SocketChannelRecordReader(org.apache.nifi.record.listen.SocketChannelRecordReader) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 14 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class ListenUDPRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
    final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    for (int i = 0; i < maxBatchSize; i++) {
        // this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
        // if the error handling is ever changed to use the error queue then this flag needs to be changed as well
        final StandardEvent event = getMessage(true, false, session);
        // break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
        if (event == null) {
            break;
        }
        // attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
        // entire datagram can be read as records, and if not transfer the whole thing to parse.failure
        final RecordReader reader;
        final List<Record> records = new ArrayList<>();
        try (final InputStream in = new ByteArrayInputStream(event.getData())) {
            reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
            Record record;
            while ((record = reader.nextRecord()) != null) {
                records.add(record);
            }
        } catch (final Exception e) {
            handleParseFailure(event, session, e);
            continue;
        }
        if (records.size() == 0) {
            handleParseFailure(event, session, null);
            continue;
        }
        // see if we already started a flow file and writer for the given sender
        // if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
        FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
        if (flowFileRecordWriter == null) {
            FlowFile flowFile = null;
            OutputStream rawOut = null;
            RecordSetWriter writer = null;
            try {
                flowFile = session.create();
                rawOut = session.write(flowFile);
                final Record firstRecord = records.get(0);
                final RecordSchema recordSchema = firstRecord.getSchema();
                final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
                writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
                writer.beginRecordSet();
                flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
                flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
            } catch (final Exception ex) {
                getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (final Exception e) {
                    getLogger().warn("Failed to close Record Writer", e);
                }
                if (rawOut != null) {
                    IOUtils.closeQuietly(rawOut);
                }
                if (flowFile != null) {
                    session.remove(flowFile);
                }
                context.yield();
                break;
            }
        }
        // attempt to write each record, if any record fails then remove the flow file and break out of the loop
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        try {
            for (final Record record : records) {
                writer.write(record);
            }
        } catch (Exception e) {
            getLogger().error("Failed to write records due to: " + e.getMessage(), e);
            IOUtils.closeQuietly(writer);
            session.remove(flowFileRecordWriter.getFlowFile());
            flowFileRecordWriters.remove(event.getSender());
            break;
        }
    }
    for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
        final String sender = entry.getKey();
        final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        FlowFile flowFile = flowFileRecordWriter.getFlowFile();
        try {
            final WriteResult writeResult;
            try {
                writeResult = writer.finishRecordSet();
            } finally {
                writer.close();
            }
            if (writeResult.getRecordCount() == 0) {
                session.remove(flowFile);
                continue;
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(getAttributes(sender));
            attributes.putAll(writeResult.getAttributes());
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
            flowFile = session.putAllAttributes(flowFile, attributes);
            session.transfer(flowFile, REL_SUCCESS);
            final String transitUri = getTransitUri(sender);
            session.getProvenanceReporter().receive(flowFile, transitUri);
        } catch (final Exception e) {
            getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StandardEvent(org.apache.nifi.processor.util.listen.event.StandardEvent) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map) HashMap(java.util.HashMap)

Example 15 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class PartitionRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, RecordPath> recordPaths;
    try {
        recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
    } catch (final Exception e) {
        getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
    try (final InputStream in = session.read(flowFile)) {
        final Map<String, String> originalAttributes = flowFile.getAttributes();
        final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
        final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
        Record record;
        while ((record = reader.nextRecord()) != null) {
            final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
            // Evaluate all of the RecordPath's for this Record
            for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
                final String propName = entry.getKey();
                final RecordPath recordPath = entry.getValue();
                final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
                final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
                recordMap.put(propName, fieldValues);
            }
            final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
            // Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
            RecordSetWriter writer = writerMap.get(recordValueMap);
            if (writer == null) {
                final FlowFile childFlowFile = session.create(flowFile);
                recordValueMap.setFlowFile(childFlowFile);
                final OutputStream out = session.write(childFlowFile);
                writer = writerFactory.createWriter(getLogger(), writeSchema, out);
                writer.beginRecordSet();
                writerMap.put(recordValueMap, writer);
            }
            writer.write(record);
        }
        // For each RecordSetWriter, finish the record set and close the writer.
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            final WriteResult writeResult = writer.finishRecordSet();
            writer.close();
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(valueMap.getAttributes());
            attributes.putAll(writeResult.getAttributes());
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            FlowFile childFlowFile = valueMap.getFlowFile();
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
        }
    } catch (final Exception e) {
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            try {
                writer.close();
            } catch (final IOException e1) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
            }
            session.remove(valueMap.getFlowFile());
        }
        getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // because we want to ensure that we are able to remove the child flowfiles in case of a failure.
    for (final RecordValueMap valueMap : writerMap.keySet()) {
        session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
    }
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordPath(org.apache.nifi.record.path.RecordPath) ArrayList(java.util.ArrayList) RecordPathValidator(org.apache.nifi.record.path.validation.RecordPathValidator) HashSet(java.util.HashSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ValidationResult(org.apache.nifi.components.ValidationResult) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) WriteResult(org.apache.nifi.serialization.WriteResult) DataTypeUtils(org.apache.nifi.serialization.record.util.DataTypeUtils) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) Collectors(java.util.stream.Collectors) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stream(java.util.stream.Stream) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) RecordPathCache(org.apache.nifi.record.path.util.RecordPathCache) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) FieldValue(org.apache.nifi.record.path.FieldValue) Collections(java.util.Collections) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) InputStream(java.io.InputStream) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) ArrayList(java.util.ArrayList) List(java.util.List) FieldValue(org.apache.nifi.record.path.FieldValue) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordPath(org.apache.nifi.record.path.RecordPath) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)21 Record (org.apache.nifi.serialization.record.Record)17 IOException (java.io.IOException)16 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)16 OutputStream (java.io.OutputStream)14 FlowFile (org.apache.nifi.flowfile.FlowFile)14 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 HashMap (java.util.HashMap)13 InputStream (java.io.InputStream)11 ProcessException (org.apache.nifi.processor.exception.ProcessException)11 RecordReader (org.apache.nifi.serialization.RecordReader)11 WriteResult (org.apache.nifi.serialization.WriteResult)10 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)8 Map (java.util.Map)7 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)7 ArrayList (java.util.ArrayList)6 ByteArrayInputStream (java.io.ByteArrayInputStream)5 HashSet (java.util.HashSet)4 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)4 RecordSet (org.apache.nifi.serialization.record.RecordSet)4