Search in sources :

Example 11 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class AbstractPutHDFSRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        context.yield();
        return;
    }
    ugi.doAs((PrivilegedAction<Object>) () -> {
        Path tempDotCopyFile = null;
        FlowFile putFlowFile = flowFile;
        try {
            // TODO codec extension
            final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
            final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
            // create the directory if it doesn't exist
            final Path directoryPath = new Path(directoryValue);
            createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
            // write to tempFile first and on success rename to destFile
            final Path tempFile = new Path(directoryPath, "." + filenameValue);
            final Path destFile = new Path(directoryPath, filenameValue);
            final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
            final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
            // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
            if (destinationExists && !shouldOverwrite) {
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
                return null;
            }
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
            final FlowFile flowFileIn = putFlowFile;
            final StopWatch stopWatch = new StopWatch(true);
            // Read records from the incoming FlowFile and write them the tempFile
            session.read(putFlowFile, (final InputStream rawIn) -> {
                RecordReader recordReader = null;
                HDFSRecordWriter recordWriter = null;
                try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                    // handle this separately from the other IOExceptions which normally route to retry
                    try {
                        recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
                    } catch (Exception e) {
                        final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
                        exceptionHolder.set(rrfe);
                        return;
                    }
                    final RecordSet recordSet = recordReader.createRecordSet();
                    recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
                    writeResult.set(recordWriter.write(recordSet));
                } catch (Exception e) {
                    exceptionHolder.set(e);
                } finally {
                    IOUtils.closeQuietly(recordReader);
                    IOUtils.closeQuietly(recordWriter);
                }
            });
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            tempDotCopyFile = tempFile;
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }
            // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
            rename(fileSystem, tempFile, destFile);
            changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
            getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
            putFlowFile = postProcess(context, session, putFlowFile, destFile);
            final String newFilename = destFile.getName();
            final String hdfsPath = destFile.getParent().toString();
            // Update the filename and absolute path attributes
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(CoreAttributes.FILENAME.key(), newFilename);
            attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            putFlowFile = session.putAllAttributes(putFlowFile, attributes);
            // Send a provenance event and transfer to success
            final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
            session.transfer(putFlowFile, REL_SUCCESS);
        } catch (IOException | FlowFileAccessException e) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { e });
            session.transfer(session.penalize(putFlowFile), REL_RETRY);
            context.yield();
        } catch (Throwable t) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { t });
            session.transfer(putFlowFile, REL_FAILURE);
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) HDFSRecordWriter(org.apache.nifi.processors.hadoop.record.HDFSRecordWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) FailureException(org.apache.nifi.processors.hadoop.exception.FailureException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StopWatch(org.apache.nifi.util.StopWatch) BufferedInputStream(java.io.BufferedInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) RecordSet(org.apache.nifi.serialization.record.RecordSet) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 12 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class PutElasticsearchHttpRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    // Authentication
    final String username = context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
    final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions(flowFile).getValue();
    OkHttpClient okHttpClient = getClient();
    final ComponentLog logger = getLogger();
    final String baseUrl = trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
    HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder().addPathSegment("_bulk");
    // Find the user-added properties and set them as query parameters on the URL
    for (Map.Entry<PropertyDescriptor, String> property : context.getProperties().entrySet()) {
        PropertyDescriptor pd = property.getKey();
        if (pd.isDynamic()) {
            if (property.getValue() != null) {
                urlBuilder = urlBuilder.addQueryParameter(pd.getName(), context.getProperty(pd).evaluateAttributeExpressions().getValue());
            }
        }
    }
    final URL url = urlBuilder.build().url();
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    if (StringUtils.isEmpty(index)) {
        logger.error("No value for index in for {}, transferring to failure", new Object[] { flowFile });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(flowFile).getValue();
    if (StringUtils.isEmpty(indexOp)) {
        logger.error("No Index operation specified for {}, transferring to failure.", new Object[] { flowFile });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    switch(indexOp.toLowerCase()) {
        case "index":
        case "update":
        case "upsert":
        case "delete":
            break;
        default:
            logger.error("Index operation {} not supported for {}, transferring to failure.", new Object[] { indexOp, flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
    }
    this.nullSuppression = context.getProperty(SUPPRESS_NULLS).getValue();
    final String id_path = context.getProperty(ID_RECORD_PATH).evaluateAttributeExpressions(flowFile).getValue();
    final RecordPath recordPath = StringUtils.isEmpty(id_path) ? null : recordPathCache.getCompiled(id_path);
    final StringBuilder sb = new StringBuilder();
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
        Record record;
        while ((record = reader.nextRecord()) != null) {
            final String id;
            if (recordPath != null) {
                Optional<FieldValue> idPathValue = recordPath.evaluate(record).getSelectedFields().findFirst();
                if (!idPathValue.isPresent() || idPathValue.get().getValue() == null) {
                    throw new IdentifierNotFoundException("Identifier Record Path specified but no value was found, transferring {} to failure.");
                }
                id = idPathValue.get().getValue().toString();
            } else {
                id = null;
            }
            // a missing ID indicates one is to be auto-generated by Elasticsearch
            if (id == null && !indexOp.equalsIgnoreCase("index")) {
                throw new IdentifierNotFoundException("Index operation {} requires a valid identifier value from a flow file attribute, transferring to failure.");
            }
            final StringBuilder json = new StringBuilder();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            JsonGenerator generator = factory.createJsonGenerator(out);
            writeRecord(record, record.getSchema(), generator);
            generator.flush();
            generator.close();
            json.append(out.toString());
            if (indexOp.equalsIgnoreCase("index")) {
                sb.append("{\"index\": { \"_index\": \"");
                sb.append(index);
                sb.append("\", \"_type\": \"");
                sb.append(docType);
                sb.append("\"");
                if (!StringUtils.isEmpty(id)) {
                    sb.append(", \"_id\": \"");
                    sb.append(id);
                    sb.append("\"");
                }
                sb.append("}}\n");
                sb.append(json);
                sb.append("\n");
            } else if (indexOp.equalsIgnoreCase("upsert") || indexOp.equalsIgnoreCase("update")) {
                sb.append("{\"update\": { \"_index\": \"");
                sb.append(index);
                sb.append("\", \"_type\": \"");
                sb.append(docType);
                sb.append("\", \"_id\": \"");
                sb.append(id);
                sb.append("\" }\n");
                sb.append("{\"doc\": ");
                sb.append(json);
                sb.append(", \"doc_as_upsert\": ");
                sb.append(indexOp.equalsIgnoreCase("upsert"));
                sb.append(" }\n");
            } else if (indexOp.equalsIgnoreCase("delete")) {
                sb.append("{\"delete\": { \"_index\": \"");
                sb.append(index);
                sb.append("\", \"_type\": \"");
                sb.append(docType);
                sb.append("\", \"_id\": \"");
                sb.append(id);
                sb.append("\" }\n");
            }
        }
    } catch (IdentifierNotFoundException infe) {
        logger.error(infe.getMessage(), new Object[] { flowFile });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (final IOException | SchemaNotFoundException | MalformedRecordException e) {
        logger.error("Could not parse incoming data", e);
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), sb.toString());
    final Response getResponse;
    try {
        getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "PUT", requestBody);
    } catch (final Exception e) {
        logger.error("Routing to {} due to exception: {}", new Object[] { REL_FAILURE.getName(), e }, e);
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final int statusCode = getResponse.code();
    if (isSuccess(statusCode)) {
        ResponseBody responseBody = getResponse.body();
        try {
            final byte[] bodyBytes = responseBody.bytes();
            JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
            boolean errors = responseJson.get("errors").asBoolean(false);
            // ES has no rollback, so if errors occur, log them and route the whole flow file to failure
            if (errors) {
                ArrayNode itemNodeArray = (ArrayNode) responseJson.get("items");
                if (itemNodeArray.size() > 0) {
                    // All items are returned whether they succeeded or failed, so iterate through the item array
                    // at the same time as the flow file list, moving each to success or failure accordingly,
                    // but only keep the first error for logging
                    String errorReason = null;
                    for (int i = itemNodeArray.size() - 1; i >= 0; i--) {
                        JsonNode itemNode = itemNodeArray.get(i);
                        int status = itemNode.findPath("status").asInt();
                        if (!isSuccess(status)) {
                            if (errorReason == null) {
                                // Use "result" if it is present; this happens for status codes like 404 Not Found, which may not have an error/reason
                                String reason = itemNode.findPath("//result").asText();
                                if (StringUtils.isEmpty(reason)) {
                                    // If there was no result, we expect an error with a string description in the "reason" field
                                    reason = itemNode.findPath("//error/reason").asText();
                                }
                                errorReason = reason;
                                logger.error("Failed to process {} due to {}, transferring to failure", new Object[] { flowFile, errorReason });
                            }
                        }
                    }
                }
                session.transfer(flowFile, REL_FAILURE);
            } else {
                session.transfer(flowFile, REL_SUCCESS);
                session.getProvenanceReporter().send(flowFile, url.toString());
            }
        } catch (IOException ioe) {
            // Something went wrong when parsing the response, log the error and route to failure
            logger.error("Error parsing Bulk API response: {}", new Object[] { ioe.getMessage() }, ioe);
            session.transfer(flowFile, REL_FAILURE);
            context.yield();
        }
    } else if (statusCode / 100 == 5) {
        // 5xx -> RETRY, but a server error might last a while, so yield
        logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...", new Object[] { statusCode, getResponse.message() });
        session.transfer(flowFile, REL_RETRY);
        context.yield();
    } else {
        // 1xx, 3xx, 4xx, etc. -> NO RETRY
        logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to failure", new Object[] { statusCode, getResponse.message() });
        session.transfer(flowFile, REL_FAILURE);
    }
    getResponse.close();
}
Also used : OkHttpClient(okhttp3.OkHttpClient) RecordReader(org.apache.nifi.serialization.RecordReader) JsonNode(com.fasterxml.jackson.databind.JsonNode) URL(java.net.URL) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) Record(org.apache.nifi.serialization.record.Record) FieldValue(org.apache.nifi.record.path.FieldValue) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) RequestBody(okhttp3.RequestBody) FlowFile(org.apache.nifi.flowfile.FlowFile) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordPath(org.apache.nifi.record.path.RecordPath) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) HttpUrl(okhttp3.HttpUrl) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ResponseBody(okhttp3.ResponseBody) Response(okhttp3.Response) ByteArrayInputStream(java.io.ByteArrayInputStream) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 13 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class ListenTCPRecord method onScheduled.

@OnScheduled
public void onScheduled(final ProcessContext context) throws IOException {
    this.port = context.getProperty(PORT).evaluateAttributeExpressions().asInteger();
    final int readTimeout = context.getProperty(READ_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue();
    final int maxSocketBufferSize = context.getProperty(MAX_SOCKET_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
    final int maxConnections = context.getProperty(MAX_CONNECTIONS).asInteger();
    final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    // if the Network Interface Property wasn't provided then a null InetAddress will indicate to bind to all interfaces
    final InetAddress nicAddress;
    final String nicAddressStr = context.getProperty(NETWORK_INTF_NAME).evaluateAttributeExpressions().getValue();
    if (!StringUtils.isEmpty(nicAddressStr)) {
        NetworkInterface netIF = NetworkInterface.getByName(nicAddressStr);
        nicAddress = netIF.getInetAddresses().nextElement();
    } else {
        nicAddress = null;
    }
    SSLContext sslContext = null;
    SslContextFactory.ClientAuth clientAuth = null;
    final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
    if (sslContextService != null) {
        final String clientAuthValue = context.getProperty(CLIENT_AUTH).getValue();
        sslContext = sslContextService.createSSLContext(SSLContextService.ClientAuth.valueOf(clientAuthValue));
        clientAuth = SslContextFactory.ClientAuth.valueOf(clientAuthValue);
    }
    // create a ServerSocketChannel in non-blocking mode and bind to the given address and port
    final ServerSocketChannel serverSocketChannel = ServerSocketChannel.open();
    serverSocketChannel.configureBlocking(false);
    serverSocketChannel.bind(new InetSocketAddress(nicAddress, port));
    this.dispatcher = new SocketChannelRecordReaderDispatcher(serverSocketChannel, sslContext, clientAuth, readTimeout, maxSocketBufferSize, maxConnections, recordReaderFactory, socketReaders, getLogger());
    // start a thread to run the dispatcher
    final Thread readerThread = new Thread(dispatcher);
    readerThread.setName(getClass().getName() + " [" + getIdentifier() + "]");
    readerThread.setDaemon(true);
    readerThread.start();
}
Also used : InetSocketAddress(java.net.InetSocketAddress) SocketChannelRecordReaderDispatcher(org.apache.nifi.record.listen.SocketChannelRecordReaderDispatcher) NetworkInterface(java.net.NetworkInterface) SSLContext(javax.net.ssl.SSLContext) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) SslContextFactory(org.apache.nifi.security.util.SslContextFactory) SSLContextService(org.apache.nifi.ssl.SSLContextService) RestrictedSSLContextService(org.apache.nifi.ssl.RestrictedSSLContextService) InetAddress(java.net.InetAddress) ServerSocketChannel(java.nio.channels.ServerSocketChannel) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled)

Example 14 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class ListenUDPRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
    final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    for (int i = 0; i < maxBatchSize; i++) {
        // this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
        // if the error handling is ever changed to use the error queue then this flag needs to be changed as well
        final StandardEvent event = getMessage(true, false, session);
        // break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
        if (event == null) {
            break;
        }
        // attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
        // entire datagram can be read as records, and if not transfer the whole thing to parse.failure
        final RecordReader reader;
        final List<Record> records = new ArrayList<>();
        try (final InputStream in = new ByteArrayInputStream(event.getData())) {
            reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
            Record record;
            while ((record = reader.nextRecord()) != null) {
                records.add(record);
            }
        } catch (final Exception e) {
            handleParseFailure(event, session, e);
            continue;
        }
        if (records.size() == 0) {
            handleParseFailure(event, session, null);
            continue;
        }
        // see if we already started a flow file and writer for the given sender
        // if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
        FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
        if (flowFileRecordWriter == null) {
            FlowFile flowFile = null;
            OutputStream rawOut = null;
            RecordSetWriter writer = null;
            try {
                flowFile = session.create();
                rawOut = session.write(flowFile);
                final Record firstRecord = records.get(0);
                final RecordSchema recordSchema = firstRecord.getSchema();
                final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
                writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
                writer.beginRecordSet();
                flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
                flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
            } catch (final Exception ex) {
                getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (final Exception e) {
                    getLogger().warn("Failed to close Record Writer", e);
                }
                if (rawOut != null) {
                    IOUtils.closeQuietly(rawOut);
                }
                if (flowFile != null) {
                    session.remove(flowFile);
                }
                context.yield();
                break;
            }
        }
        // attempt to write each record, if any record fails then remove the flow file and break out of the loop
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        try {
            for (final Record record : records) {
                writer.write(record);
            }
        } catch (Exception e) {
            getLogger().error("Failed to write records due to: " + e.getMessage(), e);
            IOUtils.closeQuietly(writer);
            session.remove(flowFileRecordWriter.getFlowFile());
            flowFileRecordWriters.remove(event.getSender());
            break;
        }
    }
    for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
        final String sender = entry.getKey();
        final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        FlowFile flowFile = flowFileRecordWriter.getFlowFile();
        try {
            final WriteResult writeResult;
            try {
                writeResult = writer.finishRecordSet();
            } finally {
                writer.close();
            }
            if (writeResult.getRecordCount() == 0) {
                session.remove(flowFile);
                continue;
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(getAttributes(sender));
            attributes.putAll(writeResult.getAttributes());
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
            flowFile = session.putAllAttributes(flowFile, attributes);
            session.transfer(flowFile, REL_SUCCESS);
            final String transitUri = getTransitUri(sender);
            session.getProvenanceReporter().receive(flowFile, transitUri);
        } catch (final Exception e) {
            getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StandardEvent(org.apache.nifi.processor.util.listen.event.StandardEvent) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map) HashMap(java.util.HashMap)

Example 15 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class PartitionRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, RecordPath> recordPaths;
    try {
        recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
    } catch (final Exception e) {
        getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
    try (final InputStream in = session.read(flowFile)) {
        final Map<String, String> originalAttributes = flowFile.getAttributes();
        final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
        final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
        Record record;
        while ((record = reader.nextRecord()) != null) {
            final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
            // Evaluate all of the RecordPath's for this Record
            for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
                final String propName = entry.getKey();
                final RecordPath recordPath = entry.getValue();
                final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
                final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
                recordMap.put(propName, fieldValues);
            }
            final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
            // Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
            RecordSetWriter writer = writerMap.get(recordValueMap);
            if (writer == null) {
                final FlowFile childFlowFile = session.create(flowFile);
                recordValueMap.setFlowFile(childFlowFile);
                final OutputStream out = session.write(childFlowFile);
                writer = writerFactory.createWriter(getLogger(), writeSchema, out);
                writer.beginRecordSet();
                writerMap.put(recordValueMap, writer);
            }
            writer.write(record);
        }
        // For each RecordSetWriter, finish the record set and close the writer.
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            final WriteResult writeResult = writer.finishRecordSet();
            writer.close();
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(valueMap.getAttributes());
            attributes.putAll(writeResult.getAttributes());
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            FlowFile childFlowFile = valueMap.getFlowFile();
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
        }
    } catch (final Exception e) {
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            try {
                writer.close();
            } catch (final IOException e1) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
            }
            session.remove(valueMap.getFlowFile());
        }
        getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // because we want to ensure that we are able to remove the child flowfiles in case of a failure.
    for (final RecordValueMap valueMap : writerMap.keySet()) {
        session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
    }
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordPath(org.apache.nifi.record.path.RecordPath) ArrayList(java.util.ArrayList) RecordPathValidator(org.apache.nifi.record.path.validation.RecordPathValidator) HashSet(java.util.HashSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ValidationResult(org.apache.nifi.components.ValidationResult) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) WriteResult(org.apache.nifi.serialization.WriteResult) DataTypeUtils(org.apache.nifi.serialization.record.util.DataTypeUtils) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) Collectors(java.util.stream.Collectors) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stream(java.util.stream.Stream) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) RecordPathCache(org.apache.nifi.record.path.util.RecordPathCache) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) FieldValue(org.apache.nifi.record.path.FieldValue) Collections(java.util.Collections) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) InputStream(java.io.InputStream) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) ArrayList(java.util.ArrayList) List(java.util.List) FieldValue(org.apache.nifi.record.path.FieldValue) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordPath(org.apache.nifi.record.path.RecordPath) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)30 IOException (java.io.IOException)22 InputStream (java.io.InputStream)22 FlowFile (org.apache.nifi.flowfile.FlowFile)21 RecordReader (org.apache.nifi.serialization.RecordReader)21 HashMap (java.util.HashMap)17 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)16 ProcessException (org.apache.nifi.processor.exception.ProcessException)15 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)14 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)14 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)13 Record (org.apache.nifi.serialization.record.Record)12 Map (java.util.Map)10 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)9 OutputStream (java.io.OutputStream)8 ArrayList (java.util.ArrayList)8 ComponentLog (org.apache.nifi.logging.ComponentLog)8 WriteResult (org.apache.nifi.serialization.WriteResult)7 RecordSet (org.apache.nifi.serialization.record.RecordSet)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)6