use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.
the class GetSolr method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final AtomicBoolean continuePaging = new AtomicBoolean(true);
final SolrQuery solrQuery = new SolrQuery();
try {
if (id_field == null) {
id_field = getFieldNameOfUniqueKey();
}
final String dateField = context.getProperty(DATE_FIELD).getValue();
final Map<String, String> stateMap = new HashMap<String, String>();
stateMap.putAll(context.getStateManager().getState(Scope.CLUSTER).toMap());
solrQuery.setQuery("*:*");
final String query = context.getProperty(SOLR_QUERY).getValue();
if (!StringUtils.isBlank(query) && !query.equals("*:*")) {
solrQuery.addFilterQuery(query);
}
final StringBuilder automatedFilterQuery = (new StringBuilder()).append(dateField).append(":[").append(stateMap.get(STATE_MANAGER_FILTER)).append(" TO *]");
solrQuery.addFilterQuery(automatedFilterQuery.toString());
final List<String> fieldList = new ArrayList<String>();
final String returnFields = context.getProperty(RETURN_FIELDS).getValue();
if (!StringUtils.isBlank(returnFields)) {
fieldList.addAll(Arrays.asList(returnFields.trim().split("[,]")));
if (!fieldList.contains(dateField)) {
fieldList.add(dateField);
dateFieldNotInSpecifiedFieldsList.set(true);
}
for (String returnField : fieldList) {
solrQuery.addField(returnField.trim());
}
}
solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, stateMap.get(STATE_MANAGER_CURSOR_MARK));
solrQuery.setRows(context.getProperty(BATCH_SIZE).asInteger());
final StringBuilder sortClause = (new StringBuilder()).append(dateField).append(" asc,").append(id_field).append(" asc");
solrQuery.setParam("sort", sortClause.toString());
while (continuePaging.get()) {
final QueryRequest req = new QueryRequest(solrQuery);
if (isBasicAuthEnabled()) {
req.setBasicAuthCredentials(getUsername(), getPassword());
}
logger.debug(solrQuery.toQueryString());
final QueryResponse response = req.process(getSolrClient());
final SolrDocumentList documentList = response.getResults();
if (response.getResults().size() > 0) {
final SolrDocument lastSolrDocument = documentList.get(response.getResults().size() - 1);
final String latestDateValue = df.format(lastSolrDocument.get(dateField));
final String newCursorMark = response.getNextCursorMark();
solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, newCursorMark);
stateMap.put(STATE_MANAGER_CURSOR_MARK, newCursorMark);
stateMap.put(STATE_MANAGER_FILTER, latestDateValue);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, "solrQuery", solrQuery.toString());
if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())) {
if (dateFieldNotInSpecifiedFieldsList.get()) {
for (SolrDocument doc : response.getResults()) {
doc.removeFields(dateField);
}
}
flowFile = session.write(flowFile, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response));
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/xml");
} else {
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordSchema schema = writerFactory.getSchema(null, null);
final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema);
final StringBuffer mimeType = new StringBuffer();
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
try {
final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out);
writer.write(recordSet);
writer.flush();
mimeType.append(writer.getMimeType());
} catch (SchemaNotFoundException e) {
throw new ProcessException("Could not parse Solr response", e);
}
}
});
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType.toString());
}
session.transfer(flowFile, REL_SUCCESS);
}
continuePaging.set(response.getResults().size() == Integer.parseInt(context.getProperty(BATCH_SIZE).getValue()));
}
context.getStateManager().setState(stateMap, Scope.CLUSTER);
} catch (SolrServerException | SchemaNotFoundException | IOException e) {
context.yield();
session.rollback();
logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), e }, e);
throw new ProcessException(e);
} catch (final Throwable t) {
context.yield();
session.rollback();
logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), t }, t);
throw t;
}
}
use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.
the class AbstractFetchHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
FlowFile child = null;
final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
try {
final Path path = new Path(filenameValue);
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final StopWatch stopWatch = new StopWatch(true);
// use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
child = session.create(originalFlowFile);
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
child = session.write(child, (final OutputStream rawOut) -> {
try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
Record record = recordReader.nextRecord();
final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
recordSetWriter.beginRecordSet();
if (record != null) {
recordSetWriter.write(record);
}
while ((record = recordReader.nextRecord()) != null) {
recordSetWriter.write(record);
}
writeResult.set(recordSetWriter.finishRecordSet());
mimeTypeRef.set(recordSetWriter.getMimeType());
}
} catch (Exception e) {
exceptionHolder.set(e);
}
});
stopWatch.stop();
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
FlowFile successFlowFile = postProcess(context, session, child, path);
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
successFlowFile = session.putAllAttributes(successFlowFile, attributes);
final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(successFlowFile, REL_SUCCESS);
session.remove(originalFlowFile);
return null;
} catch (final FileNotFoundException | AccessControlException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
} catch (final IOException | FlowFileAccessException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
session.transfer(session.penalize(originalFlowFile), REL_RETRY);
context.yield();
} catch (final Throwable t) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
}
// if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
if (child != null) {
session.remove(child);
}
return null;
});
}
use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.
the class ListenTCPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final SocketChannelRecordReader socketRecordReader = pollForSocketRecordReader();
if (socketRecordReader == null) {
return;
}
if (socketRecordReader.isClosed()) {
getLogger().warn("Unable to read records from {}, socket already closed", new Object[] { getRemoteAddress(socketRecordReader) });
// still need to call close so the overall count is decremented
IOUtils.closeQuietly(socketRecordReader);
return;
}
final int recordBatchSize = context.getProperty(RECORD_BATCH_SIZE).asInteger();
final String readerErrorHandling = context.getProperty(READER_ERROR_HANDLING_STRATEGY).getValue();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
// synchronize to ensure there are no stale values in the underlying SocketChannel
synchronized (socketRecordReader) {
FlowFile flowFile = session.create();
try {
// lazily creating the record reader here b/c we need a flow file, eventually shouldn't have to do this
RecordReader recordReader = socketRecordReader.getRecordReader();
if (recordReader == null) {
recordReader = socketRecordReader.createRecordReader(flowFile, getLogger());
}
Record record;
try {
record = recordReader.nextRecord();
} catch (final Exception e) {
boolean timeout = false;
// some of the underlying record libraries wrap the real exception in RuntimeException, so check each
// throwable (starting with the current one) to see if its a SocketTimeoutException
Throwable cause = e;
while (cause != null) {
if (cause instanceof SocketTimeoutException) {
timeout = true;
break;
}
cause = cause.getCause();
}
if (timeout) {
getLogger().debug("Timeout reading records, will try again later", e);
socketReaders.offer(socketRecordReader);
session.remove(flowFile);
return;
} else {
throw e;
}
}
if (record == null) {
getLogger().debug("No records available from {}, closing connection", new Object[] { getRemoteAddress(socketRecordReader) });
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
String mimeType = null;
WriteResult writeResult = null;
final RecordSchema recordSchema = recordSetWriterFactory.getSchema(Collections.EMPTY_MAP, record.getSchema());
try (final OutputStream out = session.write(flowFile);
final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out)) {
// start the record set and write the first record from above
recordWriter.beginRecordSet();
writeResult = recordWriter.write(record);
while (record != null && writeResult.getRecordCount() < recordBatchSize) {
// if keeping then null out the record to break out of the loop, which will transfer what we have and close the connection
try {
record = recordReader.nextRecord();
} catch (final SocketTimeoutException ste) {
getLogger().debug("Timeout reading records, will try again later", ste);
break;
} catch (final Exception e) {
if (ERROR_HANDLING_DISCARD.getValue().equals(readerErrorHandling)) {
throw e;
} else {
record = null;
}
}
if (record != null) {
writeResult = recordWriter.write(record);
}
}
writeResult = recordWriter.finishRecordSet();
recordWriter.flush();
mimeType = recordWriter.getMimeType();
}
// if we didn't write any records then we need to remove the flow file
if (writeResult.getRecordCount() <= 0) {
getLogger().debug("Removing flow file, no records were written");
session.remove(flowFile);
} else {
final String sender = getRemoteAddress(socketRecordReader);
final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeType);
attributes.put("tcp.sender", sender);
attributes.put("tcp.port", String.valueOf(port));
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
final String senderHost = sender.startsWith("/") && sender.length() > 1 ? sender.substring(1) : sender;
final String transitUri = new StringBuilder().append("tcp").append("://").append(senderHost).append(":").append(port).toString();
session.getProvenanceReporter().receive(flowFile, transitUri);
session.transfer(flowFile, REL_SUCCESS);
}
getLogger().debug("Re-queuing connection for further processing...");
socketReaders.offer(socketRecordReader);
} catch (Exception e) {
getLogger().error("Error processing records: " + e.getMessage(), e);
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
}
}
use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.
the class ListenUDPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
for (int i = 0; i < maxBatchSize; i++) {
// this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
// if the error handling is ever changed to use the error queue then this flag needs to be changed as well
final StandardEvent event = getMessage(true, false, session);
// break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
if (event == null) {
break;
}
// attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
// entire datagram can be read as records, and if not transfer the whole thing to parse.failure
final RecordReader reader;
final List<Record> records = new ArrayList<>();
try (final InputStream in = new ByteArrayInputStream(event.getData())) {
reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
Record record;
while ((record = reader.nextRecord()) != null) {
records.add(record);
}
} catch (final Exception e) {
handleParseFailure(event, session, e);
continue;
}
if (records.size() == 0) {
handleParseFailure(event, session, null);
continue;
}
// see if we already started a flow file and writer for the given sender
// if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
if (flowFileRecordWriter == null) {
FlowFile flowFile = null;
OutputStream rawOut = null;
RecordSetWriter writer = null;
try {
flowFile = session.create();
rawOut = session.write(flowFile);
final Record firstRecord = records.get(0);
final RecordSchema recordSchema = firstRecord.getSchema();
final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
writer.beginRecordSet();
flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
} catch (final Exception ex) {
getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
try {
if (writer != null) {
writer.close();
}
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer", e);
}
if (rawOut != null) {
IOUtils.closeQuietly(rawOut);
}
if (flowFile != null) {
session.remove(flowFile);
}
context.yield();
break;
}
}
// attempt to write each record, if any record fails then remove the flow file and break out of the loop
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
try {
for (final Record record : records) {
writer.write(record);
}
} catch (Exception e) {
getLogger().error("Failed to write records due to: " + e.getMessage(), e);
IOUtils.closeQuietly(writer);
session.remove(flowFileRecordWriter.getFlowFile());
flowFileRecordWriters.remove(event.getSender());
break;
}
}
for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
final String sender = entry.getKey();
final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
FlowFile flowFile = flowFileRecordWriter.getFlowFile();
try {
final WriteResult writeResult;
try {
writeResult = writer.finishRecordSet();
} finally {
writer.close();
}
if (writeResult.getRecordCount() == 0) {
session.remove(flowFile);
continue;
}
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(getAttributes(sender));
attributes.putAll(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final String transitUri = getTransitUri(sender);
session.getProvenanceReporter().receive(flowFile, transitUri);
} catch (final Exception e) {
getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
session.remove(flowFile);
}
}
}
use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.
the class PartitionRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, RecordPath> recordPaths;
try {
recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
} catch (final Exception e) {
getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
try (final InputStream in = session.read(flowFile)) {
final Map<String, String> originalAttributes = flowFile.getAttributes();
final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
// Evaluate all of the RecordPath's for this Record
for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
final String propName = entry.getKey();
final RecordPath recordPath = entry.getValue();
final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
recordMap.put(propName, fieldValues);
}
final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
// Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
RecordSetWriter writer = writerMap.get(recordValueMap);
if (writer == null) {
final FlowFile childFlowFile = session.create(flowFile);
recordValueMap.setFlowFile(childFlowFile);
final OutputStream out = session.write(childFlowFile);
writer = writerFactory.createWriter(getLogger(), writeSchema, out);
writer.beginRecordSet();
writerMap.put(recordValueMap, writer);
}
writer.write(record);
}
// For each RecordSetWriter, finish the record set and close the writer.
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
final WriteResult writeResult = writer.finishRecordSet();
writer.close();
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(valueMap.getAttributes());
attributes.putAll(writeResult.getAttributes());
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
FlowFile childFlowFile = valueMap.getFlowFile();
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
}
} catch (final Exception e) {
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
try {
writer.close();
} catch (final IOException e1) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
}
session.remove(valueMap.getFlowFile());
}
getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
// because we want to ensure that we are able to remove the child flowfiles in case of a failure.
for (final RecordValueMap valueMap : writerMap.keySet()) {
session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
}
session.transfer(flowFile, REL_ORIGINAL);
}
Aggregations