use of org.apache.nifi.processor.io.InputStreamCallback in project kylo by Teradata.
the class MergeHiveTableMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
final String databaseNameField = context.getProperty(DATABASE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String databaseOwnerField = context.getProperty(DATABASE_OWNER).evaluateAttributeExpressions(flowFile).getValue();
final String tableCreateTimeField = context.getProperty(TABLE_CREATE_TIME).evaluateAttributeExpressions(flowFile).getValue();
final String tableNameField = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String tableTypeField = context.getProperty(TABLE_TYPE).evaluateAttributeExpressions(flowFile).getValue();
final String columnNameField = context.getProperty(COLUMN_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnTypeField = context.getProperty(COLUMN_TYPE).evaluateAttributeExpressions(flowFile).getValue();
final String columnCommentField = context.getProperty(COLUMN_COMMENT).evaluateAttributeExpressions(flowFile).getValue();
final StringBuffer sb = new StringBuffer();
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
sb.append(IOUtils.toString(in, Charset.defaultCharset()));
}
});
logger.debug("The json that was received is: " + sb.toString());
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
try {
JSONArray array = new JSONArray(sb.toString());
Map<String, Metadata> tables = new HashMap<>();
for (int i = 0; i < array.length(); i++) {
JSONObject jsonObj = array.getJSONObject(i);
String databaseName = jsonObj.getString(databaseNameField);
String databaseOwner = jsonObj.getString(databaseOwnerField);
String tableName = jsonObj.getString(tableNameField);
String tableCreateTime = jsonObj.getString(tableCreateTimeField);
String tableType = jsonObj.getString(tableTypeField);
String columnName = jsonObj.getString(columnNameField);
String columnType = jsonObj.getString(columnTypeField);
String columnComment = jsonObj.getString(columnCommentField);
String key = databaseName + tableName;
if (tables.containsKey(key)) {
Metadata meta = tables.get(key);
HiveColumn column = new HiveColumn();
column.setColumnName(columnName);
column.setColumnType(columnType);
column.setColumnComment(columnComment);
meta.getHiveColumns().add(column);
} else {
Metadata meta = new Metadata();
meta.setDatabaseName(databaseName);
meta.setDatabaseOwner(databaseOwner);
meta.setTableCreateTime(tableCreateTime);
meta.setTableName(tableName);
meta.setTableType(tableType);
HiveColumn column = new HiveColumn();
column.setColumnName(columnName);
column.setColumnType(columnType);
column.setColumnComment(columnComment);
meta.getHiveColumns().add(column);
tables.put(key, meta);
}
}
List<Metadata> tablesAsList = new ArrayList<>();
Iterator iter = tables.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry pair = (Map.Entry) iter.next();
tablesAsList.add((Metadata) pair.getValue());
}
Gson gson = new Gson();
JsonElement element = gson.toJsonTree(tablesAsList, new TypeToken<List<Metadata>>() {
}.getType());
JsonArray jsonArray = element.getAsJsonArray();
out.write(jsonArray.toString().getBytes());
} catch (final Exception e) {
throw new ProcessException(e);
}
}
});
logger.info("*** Completed with status ");
session.transfer(flowFile, REL_SUCCESS);
} catch (final Exception e) {
logger.error("Unable to execute merge hive json job", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class TestStandardProcessSession method testContentNotFoundExceptionThrownWhenUnableToReadDataOffsetTooLarge.
@Test
public void testContentNotFoundExceptionThrownWhenUnableToReadDataOffsetTooLarge() {
final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).build();
flowFileQueue.put(flowFileRecord);
FlowFile ff1 = session.get();
ff1 = session.write(ff1, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
}
});
session.transfer(ff1);
session.commit();
final FlowFileRecord flowFileRecord2 = new StandardFlowFileRecord.Builder().addAttribute("uuid", "12345678-1234-1234-1234-123456789012").entryDate(System.currentTimeMillis()).contentClaim(new StandardContentClaim(resourceClaimManager.newResourceClaim("x", "x", "0", true, false), 0L)).contentClaimOffset(1000L).size(1L).build();
flowFileQueue.put(flowFileRecord2);
// attempt to read the data.
try {
session.get();
final FlowFile ff2 = session.get();
session.read(ff2, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
}
});
Assert.fail("Expected MissingFlowFileException");
} catch (final MissingFlowFileException mffe) {
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutCassandraQL method getCQL.
/**
* Determines the CQL statement that should be executed for the given FlowFile
*
* @param session the session that can be used to access the given FlowFile
* @param flowFile the FlowFile whose CQL statement should be executed
* @return the CQL that is associated with the given FlowFile
*/
private String getCQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
// Read the CQL from the FlowFile's content
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer);
}
});
// Create the PreparedStatement string to use for this FlowFile.
return new String(buffer, charset);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutCouchbaseKey method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final byte[] content = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, content, true);
}
});
String docId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (!StringUtils.isEmpty(context.getProperty(DOC_ID).getValue())) {
docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
}
try {
Document<?> doc = null;
final DocumentType documentType = DocumentType.valueOf(context.getProperty(DOCUMENT_TYPE).getValue());
switch(documentType) {
case Json:
{
doc = RawJsonDocument.create(docId, new String(content, StandardCharsets.UTF_8));
break;
}
case Binary:
{
final ByteBuf buf = Unpooled.copiedBuffer(content);
doc = BinaryDocument.create(docId, buf);
break;
}
}
final PersistTo persistTo = PersistTo.valueOf(context.getProperty(PERSIST_TO).getValue());
final ReplicateTo replicateTo = ReplicateTo.valueOf(context.getProperty(REPLICATE_TO).getValue());
doc = openBucket(context).upsert(doc, persistTo, replicateTo);
final Map<String, String> updatedAttrs = new HashMap<>();
updatedAttrs.put(CouchbaseAttributes.Cluster.key(), context.getProperty(COUCHBASE_CLUSTER_SERVICE).getValue());
updatedAttrs.put(CouchbaseAttributes.Bucket.key(), context.getProperty(BUCKET_NAME).getValue());
updatedAttrs.put(CouchbaseAttributes.DocId.key(), docId);
updatedAttrs.put(CouchbaseAttributes.Cas.key(), String.valueOf(doc.cas()));
updatedAttrs.put(CouchbaseAttributes.Expiry.key(), String.valueOf(doc.expiry()));
flowFile = session.putAllAttributes(flowFile, updatedAttrs);
session.getProvenanceReporter().send(flowFile, getTransitUrl(context, docId));
session.transfer(flowFile, REL_SUCCESS);
} catch (final CouchbaseException e) {
String errMsg = String.format("Writing document %s to Couchbase Server using %s failed due to %s", docId, flowFile, e);
handleCouchbaseException(context, session, logger, flowFile, e, errMsg);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutElasticsearch5 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
synchronized (esClient) {
if (esClient.get() == null) {
super.setup(context);
}
}
final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
final List<FlowFile> flowFiles = session.get(batchSize);
if (flowFiles.isEmpty()) {
return;
}
final ComponentLog logger = getLogger();
// Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
try {
final BulkRequestBuilder bulk = esClient.get().prepareBulk();
for (FlowFile file : flowFiles) {
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
final String id = file.getAttribute(id_attribute);
if (id == null) {
logger.warn("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
flowFilesToTransfer.remove(file);
session.transfer(file, REL_FAILURE);
} else {
session.read(file, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
// For the bulk insert, each document has to be on its own line, so remove all CRLF
String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
if (indexOp.equalsIgnoreCase("index")) {
bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
} else if (indexOp.equalsIgnoreCase("upsert")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
} else if (indexOp.equalsIgnoreCase("update")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
} else {
throw new IOException("Index operation: " + indexOp + " not supported.");
}
}
});
}
}
if (bulk.numberOfActions() > 0) {
final BulkResponse response = bulk.execute().actionGet();
if (response.hasFailures()) {
// Responses are guaranteed to be in order, remove them in reverse order
BulkItemResponse[] responses = response.getItems();
if (responses != null && responses.length > 0) {
for (int i = responses.length - 1; i >= 0; i--) {
final BulkItemResponse item = responses[i];
final FlowFile flowFile = flowFilesToTransfer.get(item.getItemId());
if (item.isFailed()) {
logger.warn("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, item.getFailure().getMessage() });
session.transfer(flowFile, REL_FAILURE);
} else {
session.getProvenanceReporter().send(flowFile, response.remoteAddress().getAddress());
session.transfer(flowFile, REL_SUCCESS);
}
flowFilesToTransfer.remove(flowFile);
}
}
}
// Transfer any remaining flowfiles to success
for (FlowFile ff : flowFilesToTransfer) {
session.getProvenanceReporter().send(ff, response.remoteAddress().getAddress());
session.transfer(ff, REL_SUCCESS);
}
}
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
// Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
// traceable cause. However the cause seems to be logged, just not available to this caught exception.
// Since the error message will show up as a bulletin, we make specific mention to check the logs for
// more details.
logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFilesToTransfer, REL_RETRY);
context.yield();
} catch (Exception exceptionToFail) {
logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
session.transfer(flowFilesToTransfer, REL_FAILURE);
context.yield();
}
}
Aggregations