use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ValidateXml method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final List<FlowFile> flowFiles = session.get(50);
if (flowFiles.isEmpty()) {
return;
}
final Schema schema = schemaRef.get();
final Validator validator = schema.newValidator();
final ComponentLog logger = getLogger();
for (FlowFile flowFile : flowFiles) {
final AtomicBoolean valid = new AtomicBoolean(true);
final AtomicReference<Exception> exception = new AtomicReference<Exception>(null);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try {
validator.validate(new StreamSource(in));
} catch (final IllegalArgumentException | SAXException e) {
valid.set(false);
exception.set(e);
}
}
});
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_VALID);
session.transfer(flowFile, REL_VALID);
} else {
flowFile = session.putAttribute(flowFile, ERROR_ATTRIBUTE_KEY, exception.get().getLocalizedMessage());
logger.info("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile, exception.get().getLocalizedMessage() });
session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.transfer(flowFile, REL_INVALID);
}
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class AbstractRouteRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final T flowFileContext;
try {
flowFileContext = getFlowFileContext(flowFile, context);
} catch (final Exception e) {
getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final AtomicInteger numRecords = new AtomicInteger(0);
final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
final FlowFile original = flowFile;
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
numRecords.incrementAndGet();
for (final Relationship relationship : relationships) {
final RecordSetWriter recordSetWriter;
Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
if (tuple == null) {
FlowFile outFlowFile = session.create(original);
final OutputStream out = session.write(outFlowFile);
recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
recordSetWriter.beginRecordSet();
tuple = new Tuple<>(outFlowFile, recordSetWriter);
writers.put(relationship, tuple);
} else {
recordSetWriter = tuple.getValue();
}
recordSetWriter.write(record);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Could not parse incoming data", e);
}
}
});
for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
final Relationship relationship = entry.getKey();
final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
final RecordSetWriter writer = tuple.getValue();
FlowFile childFlowFile = tuple.getKey();
final WriteResult writeResult = writer.finishRecordSet();
try {
writer.close();
} catch (final IOException ioe) {
getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
}
final Map<String, String> attributes = new HashMap<>();
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.transfer(childFlowFile, relationship);
session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
session.getProvenanceReporter().route(childFlowFile, relationship);
}
} catch (final Exception e) {
getLogger().error("Failed to process {}", new Object[] { flowFile, e });
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
try {
tuple.getValue().close();
} catch (final Exception e1) {
getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
}
session.remove(tuple.getKey());
}
session.transfer(flowFile, REL_FAILURE);
return;
} finally {
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
final RecordSetWriter writer = tuple.getValue();
try {
writer.close();
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
}
}
}
if (isRouteOriginal()) {
flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
session.transfer(flowFile, REL_ORIGINAL);
} else {
session.remove(flowFile);
}
getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ConvertJSONToSQL method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final boolean translateFieldNames = context.getProperty(TRANSLATE_FIELD_NAMES).asBoolean();
final boolean ignoreUnmappedFields = IGNORE_UNMATCHED_FIELD.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_FIELD_BEHAVIOR).getValue());
final String statementType = context.getProperty(STATEMENT_TYPE).getValue();
final String updateKeys = context.getProperty(UPDATE_KEY).evaluateAttributeExpressions(flowFile).getValue();
final String catalog = context.getProperty(CATALOG_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String schemaName = context.getProperty(SCHEMA_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final SchemaKey schemaKey = new SchemaKey(catalog, tableName);
final boolean includePrimaryKeys = UPDATE_TYPE.equals(statementType) && updateKeys == null;
// Is the unmatched column behaviour fail or warning?
final boolean failUnmappedColumns = FAIL_UNMATCHED_COLUMN.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_COLUMN_BEHAVIOR).getValue());
final boolean warningUnmappedColumns = WARNING_UNMATCHED_COLUMN.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_COLUMN_BEHAVIOR).getValue());
// Escape column names?
final boolean escapeColumnNames = context.getProperty(QUOTED_IDENTIFIERS).asBoolean();
// Quote table name?
final boolean quoteTableName = context.getProperty(QUOTED_TABLE_IDENTIFIER).asBoolean();
// Attribute prefix
final String attributePrefix = context.getProperty(SQL_PARAM_ATTR_PREFIX).evaluateAttributeExpressions(flowFile).getValue();
// get the database schema from the cache, if one exists. We do this in a synchronized block, rather than
// using a ConcurrentMap because the Map that we are using is a LinkedHashMap with a capacity such that if
// the Map grows beyond this capacity, old elements are evicted. We do this in order to avoid filling the
// Java Heap if there are a lot of different SQL statements being generated that reference different tables.
TableSchema schema;
synchronized (this) {
schema = schemaCache.get(schemaKey);
if (schema == null) {
// No schema exists for this table yet. Query the database to determine the schema and put it into the cache.
final DBCPService dbcpService = context.getProperty(CONNECTION_POOL).asControllerService(DBCPService.class);
try (final Connection conn = dbcpService.getConnection()) {
schema = TableSchema.from(conn, catalog, schemaName, tableName, translateFieldNames, includePrimaryKeys);
schemaCache.put(schemaKey, schema);
} catch (final SQLException e) {
getLogger().error("Failed to convert {} into a SQL statement due to {}; routing to failure", new Object[] { flowFile, e.toString() }, e);
session.transfer(flowFile, REL_FAILURE);
return;
}
}
}
// Parse the JSON document
final ObjectMapper mapper = new ObjectMapper();
final AtomicReference<JsonNode> rootNodeRef = new AtomicReference<>(null);
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final InputStream bufferedIn = new BufferedInputStream(in)) {
rootNodeRef.set(mapper.readTree(bufferedIn));
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to parse {} as JSON due to {}; routing to failure", new Object[] { flowFile, pe.toString() }, pe);
session.transfer(flowFile, REL_FAILURE);
return;
}
final JsonNode rootNode = rootNodeRef.get();
// The node may or may not be a Json Array. If it isn't, we will create an
// ArrayNode and add just the root node to it. We do this so that we can easily iterate
// over the array node, rather than duplicating the logic or creating another function that takes many variables
// in order to implement the logic.
final ArrayNode arrayNode;
if (rootNode.isArray()) {
arrayNode = (ArrayNode) rootNode;
} else {
final JsonNodeFactory nodeFactory = JsonNodeFactory.instance;
arrayNode = new ArrayNode(nodeFactory);
arrayNode.add(rootNode);
}
final String fragmentIdentifier = UUID.randomUUID().toString();
final Set<FlowFile> created = new HashSet<>();
for (int i = 0; i < arrayNode.size(); i++) {
final JsonNode jsonNode = arrayNode.get(i);
final String sql;
final Map<String, String> attributes = new HashMap<>();
try {
// build the fully qualified table name
final StringBuilder tableNameBuilder = new StringBuilder();
if (catalog != null) {
tableNameBuilder.append(catalog).append(".");
}
if (schemaName != null) {
tableNameBuilder.append(schemaName).append(".");
}
tableNameBuilder.append(tableName);
final String fqTableName = tableNameBuilder.toString();
if (INSERT_TYPE.equals(statementType)) {
sql = generateInsert(jsonNode, attributes, fqTableName, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
} else if (UPDATE_TYPE.equals(statementType)) {
sql = generateUpdate(jsonNode, attributes, fqTableName, updateKeys, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
} else {
sql = generateDelete(jsonNode, attributes, fqTableName, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
}
} catch (final ProcessException pe) {
getLogger().error("Failed to convert {} to a SQL {} statement due to {}; routing to failure", new Object[] { flowFile, statementType, pe.toString() }, pe);
session.remove(created);
session.transfer(flowFile, REL_FAILURE);
return;
}
FlowFile sqlFlowFile = session.create(flowFile);
created.add(sqlFlowFile);
sqlFlowFile = session.write(sqlFlowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(sql.getBytes(StandardCharsets.UTF_8));
}
});
attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
attributes.put(attributePrefix + ".table", tableName);
attributes.put(FRAGMENT_ID.key(), fragmentIdentifier);
attributes.put(FRAGMENT_COUNT.key(), String.valueOf(arrayNode.size()));
attributes.put(FRAGMENT_INDEX.key(), String.valueOf(i));
if (catalog != null) {
attributes.put(attributePrefix + ".catalog", catalog);
}
sqlFlowFile = session.putAllAttributes(sqlFlowFile, attributes);
session.transfer(sqlFlowFile, REL_SQL);
}
flowFile = copyAttributesToOriginal(session, flowFile, fragmentIdentifier, arrayNode.size());
session.transfer(flowFile, REL_ORIGINAL);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractGrok method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
final String contentString;
byte[] buffer = bufferQueue.poll();
if (buffer == null) {
final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
buffer = new byte[maxBufferSize];
}
try {
final byte[] byteBuffer = buffer;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
StreamUtils.fillBuffer(in, byteBuffer, false);
}
});
final long len = Math.min(byteBuffer.length, flowFile.getSize());
contentString = new String(byteBuffer, 0, (int) len, charset);
} finally {
bufferQueue.offer(buffer);
}
final Match gm = grok.match(contentString);
gm.captures();
if (gm.toMap().isEmpty()) {
session.transfer(flowFile, REL_NO_MATCH);
getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[] { flowFile });
return;
}
final ObjectMapper objectMapper = new ObjectMapper();
switch(context.getProperty(DESTINATION).getValue()) {
case FLOWFILE_ATTRIBUTE:
Map<String, String> grokResults = new HashMap<>();
for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
if (null != entry.getValue()) {
grokResults.put("grok." + entry.getKey(), entry.getValue().toString());
}
}
flowFile = session.putAllAttributes(flowFile, grokResults);
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_MATCH);
getLogger().info("Matched {} Grok Expressions and added attributes to FlowFile {}", new Object[] { grokResults.size(), flowFile });
break;
case FLOWFILE_CONTENT:
FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
out.write(objectMapper.writeValueAsBytes(gm.toMap()));
}
});
conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
session.getProvenanceReporter().modifyContent(conFlowfile, "Replaced content with parsed Grok fields and values", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(conFlowfile, REL_MATCH);
break;
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutSolrContentStream method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final AtomicReference<Exception> error = new AtomicReference<>(null);
final AtomicReference<Exception> connectionError = new AtomicReference<>(null);
final boolean isSolrCloud = SOLR_TYPE_CLOUD.equals(context.getProperty(SOLR_TYPE).getValue());
final String collection = context.getProperty(COLLECTION).evaluateAttributeExpressions(flowFile).getValue();
final Long commitWithin = context.getProperty(COMMIT_WITHIN).evaluateAttributeExpressions(flowFile).asLong();
final String contentStreamPath = context.getProperty(CONTENT_STREAM_PATH).evaluateAttributeExpressions(flowFile).getValue();
final MultiMapSolrParams requestParams = new MultiMapSolrParams(getRequestParams(context, flowFile));
StopWatch timer = new StopWatch(true);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
ContentStreamUpdateRequest request = new ContentStreamUpdateRequest(contentStreamPath);
request.setParams(new ModifiableSolrParams());
// add the extra params, don't use 'set' in case of repeating params
Iterator<String> paramNames = requestParams.getParameterNamesIterator();
while (paramNames.hasNext()) {
String paramName = paramNames.next();
for (String paramValue : requestParams.getParams(paramName)) {
request.getParams().add(paramName, paramValue);
}
}
// specify the collection for SolrCloud
if (isSolrCloud) {
request.setParam(COLLECTION_PARAM_NAME, collection);
}
if (commitWithin != null && commitWithin > 0) {
request.setParam(COMMIT_WITHIN_PARAM_NAME, commitWithin.toString());
}
// if a username and password were provided then pass them for basic auth
if (isBasicAuthEnabled()) {
request.setBasicAuthCredentials(getUsername(), getPassword());
}
try (final BufferedInputStream bufferedIn = new BufferedInputStream(in)) {
// add the FlowFile's content on the UpdateRequest
request.addContentStream(new ContentStreamBase() {
@Override
public InputStream getStream() throws IOException {
return bufferedIn;
}
@Override
public String getContentType() {
return context.getProperty(CONTENT_TYPE).evaluateAttributeExpressions().getValue();
}
});
UpdateResponse response = request.process(getSolrClient());
getLogger().debug("Got {} response from Solr", new Object[] { response.getStatus() });
} catch (SolrException e) {
error.set(e);
} catch (SolrServerException e) {
if (causedByIOException(e)) {
connectionError.set(e);
} else {
error.set(e);
}
} catch (IOException e) {
connectionError.set(e);
}
}
});
timer.stop();
if (error.get() != null) {
getLogger().error("Failed to send {} to Solr due to {}; routing to failure", new Object[] { flowFile, error.get() });
session.transfer(flowFile, REL_FAILURE);
} else if (connectionError.get() != null) {
getLogger().error("Failed to send {} to Solr due to {}; routing to connection_failure", new Object[] { flowFile, connectionError.get() });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_CONNECTION_FAILURE);
} else {
StringBuilder transitUri = new StringBuilder("solr://");
transitUri.append(getSolrLocation());
if (isSolrCloud) {
transitUri.append(":").append(collection);
}
final long duration = timer.getDuration(TimeUnit.MILLISECONDS);
session.getProvenanceReporter().send(flowFile, transitUri.toString(), duration, true);
getLogger().info("Successfully sent {} to Solr in {} millis", new Object[] { flowFile, duration });
session.transfer(flowFile, REL_SUCCESS);
}
}
Aggregations