Search in sources :

Example 1 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project kylo by Teradata.

the class MergeHiveTableMetadata method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    try {
        final String databaseNameField = context.getProperty(DATABASE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String databaseOwnerField = context.getProperty(DATABASE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
        final String tableCreateTimeField = context.getProperty(TABLE_CREATE_TIME).evaluateAttributeExpressions(flowFile).getValue();
        final String tableNameField = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String tableTypeField = context.getProperty(TABLE_TYPE).evaluateAttributeExpressions(flowFile).getValue();
        final String columnNameField = context.getProperty(COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String columnTypeField = context.getProperty(COLUMN_TYPE).evaluateAttributeExpressions(flowFile).getValue();
        final String columnCommentField = context.getProperty(COLUMN_COMMENT).evaluateAttributeExpressions(flowFile).getValue();
        final StringBuffer sb = new StringBuffer();
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                sb.append(IOUtils.toString(in, Charset.defaultCharset()));
            }
        });
        logger.debug("The json that was received is: " + sb.toString());
        flowFile = session.write(flowFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                try {
                    JSONArray array = new JSONArray(sb.toString());
                    Map<String, Metadata> tables = new HashMap<>();
                    for (int i = 0; i < array.length(); i++) {
                        JSONObject jsonObj = array.getJSONObject(i);
                        String databaseName = jsonObj.getString(databaseNameField);
                        String databaseOwner = jsonObj.getString(databaseOwnerField);
                        String tableName = jsonObj.getString(tableNameField);
                        String tableCreateTime = jsonObj.getString(tableCreateTimeField);
                        String tableType = jsonObj.getString(tableTypeField);
                        String columnName = jsonObj.getString(columnNameField);
                        String columnType = jsonObj.getString(columnTypeField);
                        String columnComment = jsonObj.getString(columnCommentField);
                        String key = databaseName + tableName;
                        if (tables.containsKey(key)) {
                            Metadata meta = tables.get(key);
                            HiveColumn column = new HiveColumn();
                            column.setColumnName(columnName);
                            column.setColumnType(columnType);
                            column.setColumnComment(columnComment);
                            meta.getHiveColumns().add(column);
                        } else {
                            Metadata meta = new Metadata();
                            meta.setDatabaseName(databaseName);
                            meta.setDatabaseOwner(databaseOwner);
                            meta.setTableCreateTime(tableCreateTime);
                            meta.setTableName(tableName);
                            meta.setTableType(tableType);
                            HiveColumn column = new HiveColumn();
                            column.setColumnName(columnName);
                            column.setColumnType(columnType);
                            column.setColumnComment(columnComment);
                            meta.getHiveColumns().add(column);
                            tables.put(key, meta);
                        }
                    }
                    List<Metadata> tablesAsList = new ArrayList<>();
                    Iterator iter = tables.entrySet().iterator();
                    while (iter.hasNext()) {
                        Map.Entry pair = (Map.Entry) iter.next();
                        tablesAsList.add((Metadata) pair.getValue());
                    }
                    Gson gson = new Gson();
                    JsonElement element = gson.toJsonTree(tablesAsList, new TypeToken<List<Metadata>>() {
                    }.getType());
                    JsonArray jsonArray = element.getAsJsonArray();
                    out.write(jsonArray.toString().getBytes());
                } catch (final Exception e) {
                    throw new ProcessException(e);
                }
            }
        });
        logger.info("*** Completed with status ");
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        logger.error("Unable to execute merge hive json job", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) Iterator(java.util.Iterator) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) JSONArray(org.codehaus.jettison.json.JSONArray) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) JsonArray(com.google.gson.JsonArray) ProcessException(org.apache.nifi.processor.exception.ProcessException) JSONObject(org.codehaus.jettison.json.JSONObject) JsonElement(com.google.gson.JsonElement) TypeToken(com.google.gson.reflect.TypeToken) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) JSONObject(org.codehaus.jettison.json.JSONObject) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class TestStandardProcessSession method testContentNotFoundExceptionThrownWhenUnableToReadDataOffsetTooLarge.

@Test
public void testContentNotFoundExceptionThrownWhenUnableToReadDataOffsetTooLarge() {
    final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).build();
    flowFileQueue.put(flowFileRecord);
    FlowFile ff1 = session.get();
    ff1 = session.write(ff1, new OutputStreamCallback() {

        @Override
        public void process(OutputStream out) throws IOException {
        }
    });
    session.transfer(ff1);
    session.commit();
    final FlowFileRecord flowFileRecord2 = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).contentClaimOffset(1000L).size(1L).build();
    flowFileQueue.put(flowFileRecord2);
    // attempt to read the data.
    try {
        session.get();
        final FlowFile ff2 = session.get();
        session.read(ff2, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
            }
        });
        Assert.fail("Expected MissingFlowFileException");
    } catch (final MissingFlowFileException mffe) {
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) StandardContentClaim(org.apache.nifi.controller.repository.claim.StandardContentClaim) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FilterOutputStream(java.io.FilterOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) IOException(java.io.IOException) MissingFlowFileException(org.apache.nifi.processor.exception.MissingFlowFileException) Test(org.junit.Test)

Example 3 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutCassandraQL method getCQL.

/**
 * Determines the CQL statement that should be executed for the given FlowFile
 *
 * @param session  the session that can be used to access the given FlowFile
 * @param flowFile the FlowFile whose CQL statement should be executed
 * @return the CQL that is associated with the given FlowFile
 */
private String getCQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
    // Read the CQL from the FlowFile's content
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    // Create the PreparedStatement string to use for this FlowFile.
    return new String(buffer, charset);
}
Also used : InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) IOException(java.io.IOException)

Example 4 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutCouchbaseKey method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final byte[] content = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, content, true);
        }
    });
    String docId = flowFile.getAttribute(CoreAttributes.UUID.key());
    if (!StringUtils.isEmpty(context.getProperty(DOC_ID).getValue())) {
        docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    }
    try {
        Document<?> doc = null;
        final DocumentType documentType = DocumentType.valueOf(context.getProperty(DOCUMENT_TYPE).getValue());
        switch(documentType) {
            case Json:
                {
                    doc = RawJsonDocument.create(docId, new String(content, StandardCharsets.UTF_8));
                    break;
                }
            case Binary:
                {
                    final ByteBuf buf = Unpooled.copiedBuffer(content);
                    doc = BinaryDocument.create(docId, buf);
                    break;
                }
        }
        final PersistTo persistTo = PersistTo.valueOf(context.getProperty(PERSIST_TO).getValue());
        final ReplicateTo replicateTo = ReplicateTo.valueOf(context.getProperty(REPLICATE_TO).getValue());
        doc = openBucket(context).upsert(doc, persistTo, replicateTo);
        final Map<String, String> updatedAttrs = new HashMap<>();
        updatedAttrs.put(CouchbaseAttributes.Cluster.key(), context.getProperty(COUCHBASE_CLUSTER_SERVICE).getValue());
        updatedAttrs.put(CouchbaseAttributes.Bucket.key(), context.getProperty(BUCKET_NAME).getValue());
        updatedAttrs.put(CouchbaseAttributes.DocId.key(), docId);
        updatedAttrs.put(CouchbaseAttributes.Cas.key(), String.valueOf(doc.cas()));
        updatedAttrs.put(CouchbaseAttributes.Expiry.key(), String.valueOf(doc.expiry()));
        flowFile = session.putAllAttributes(flowFile, updatedAttrs);
        session.getProvenanceReporter().send(flowFile, getTransitUrl(context, docId));
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final CouchbaseException e) {
        String errMsg = String.format("Writing document %s to Couchbase Server using %s failed due to %s", docId, flowFile, e);
        handleCouchbaseException(context, session, logger, flowFile, e, errMsg);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) IOException(java.io.IOException) ByteBuf(com.couchbase.client.deps.io.netty.buffer.ByteBuf) ComponentLog(org.apache.nifi.logging.ComponentLog) ReplicateTo(com.couchbase.client.java.ReplicateTo) CouchbaseException(com.couchbase.client.core.CouchbaseException) PersistTo(com.couchbase.client.java.PersistTo) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 5 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutElasticsearch5 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    synchronized (esClient) {
        if (esClient.get() == null) {
            super.setup(context);
        }
    }
    final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
    final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    final ComponentLog logger = getLogger();
    // Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
    List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    try {
        final BulkRequestBuilder bulk = esClient.get().prepareBulk();
        for (FlowFile file : flowFiles) {
            final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
            final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
            final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
            final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
            final String id = file.getAttribute(id_attribute);
            if (id == null) {
                logger.warn("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
                flowFilesToTransfer.remove(file);
                session.transfer(file, REL_FAILURE);
            } else {
                session.read(file, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        // For the bulk insert, each document has to be on its own line, so remove all CRLF
                        String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
                        if (indexOp.equalsIgnoreCase("index")) {
                            bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
                        } else if (indexOp.equalsIgnoreCase("upsert")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
                        } else if (indexOp.equalsIgnoreCase("update")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
                        } else {
                            throw new IOException("Index operation: " + indexOp + " not supported.");
                        }
                    }
                });
            }
        }
        if (bulk.numberOfActions() > 0) {
            final BulkResponse response = bulk.execute().actionGet();
            if (response.hasFailures()) {
                // Responses are guaranteed to be in order, remove them in reverse order
                BulkItemResponse[] responses = response.getItems();
                if (responses != null && responses.length > 0) {
                    for (int i = responses.length - 1; i >= 0; i--) {
                        final BulkItemResponse item = responses[i];
                        final FlowFile flowFile = flowFilesToTransfer.get(item.getItemId());
                        if (item.isFailed()) {
                            logger.warn("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, item.getFailure().getMessage() });
                            session.transfer(flowFile, REL_FAILURE);
                        } else {
                            session.getProvenanceReporter().send(flowFile, response.remoteAddress().getAddress());
                            session.transfer(flowFile, REL_SUCCESS);
                        }
                        flowFilesToTransfer.remove(flowFile);
                    }
                }
            }
            // Transfer any remaining flowfiles to success
            for (FlowFile ff : flowFilesToTransfer) {
                session.getProvenanceReporter().send(ff, response.remoteAddress().getAddress());
                session.transfer(ff, REL_SUCCESS);
            }
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        // Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
        // traceable cause. However the cause seems to be logged, just not available to this caught exception.
        // Since the error message will show up as a bulletin, we make specific mention to check the logs for
        // more details.
        logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFilesToTransfer, REL_RETRY);
        context.yield();
    } catch (Exception exceptionToFail) {
        logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
        session.transfer(flowFilesToTransfer, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IOException(java.io.IOException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) LinkedList(java.util.LinkedList) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NodeClosedException(org.elasticsearch.node.NodeClosedException) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7