use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class AbstractPutHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile flowFile = session.get();
if (flowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
Path tempDotCopyFile = null;
FlowFile putFlowFile = flowFile;
try {
// TODO codec extension
final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
// create the directory if it doesn't exist
final Path directoryPath = new Path(directoryValue);
createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
// write to tempFile first and on success rename to destFile
final Path tempFile = new Path(directoryPath, "." + filenameValue);
final Path destFile = new Path(directoryPath, filenameValue);
final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
// if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
if (destinationExists && !shouldOverwrite) {
session.transfer(session.penalize(putFlowFile), REL_FAILURE);
getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
return null;
}
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final FlowFile flowFileIn = putFlowFile;
final StopWatch stopWatch = new StopWatch(true);
// Read records from the incoming FlowFile and write them the tempFile
session.read(putFlowFile, (final InputStream rawIn) -> {
RecordReader recordReader = null;
HDFSRecordWriter recordWriter = null;
try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
// handle this separately from the other IOExceptions which normally route to retry
try {
recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
} catch (Exception e) {
final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
exceptionHolder.set(rrfe);
return;
}
final RecordSet recordSet = recordReader.createRecordSet();
recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
writeResult.set(recordWriter.write(recordSet));
} catch (Exception e) {
exceptionHolder.set(e);
} finally {
IOUtils.closeQuietly(recordReader);
IOUtils.closeQuietly(recordWriter);
}
});
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
tempDotCopyFile = tempFile;
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
// Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
rename(fileSystem, tempFile, destFile);
changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
putFlowFile = postProcess(context, session, putFlowFile, destFile);
final String newFilename = destFile.getName();
final String hdfsPath = destFile.getParent().toString();
// Update the filename and absolute path attributes
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(CoreAttributes.FILENAME.key(), newFilename);
attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
putFlowFile = session.putAllAttributes(putFlowFile, attributes);
// Send a provenance event and transfer to success
final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
session.transfer(putFlowFile, REL_SUCCESS);
} catch (IOException | FlowFileAccessException e) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { e });
session.transfer(session.penalize(putFlowFile), REL_RETRY);
context.yield();
} catch (Throwable t) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { t });
session.transfer(putFlowFile, REL_FAILURE);
}
return null;
});
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class MockRecordParser method createRecordReader.
@Override
public RecordReader createRecordReader(Map<String, String> variables, InputStream in, ComponentLog logger) throws IOException, SchemaNotFoundException {
final Iterator<Object[]> itr = records.iterator();
return new RecordReader() {
private int recordCount = 0;
@Override
public void close() throws IOException {
}
@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknown) throws IOException, MalformedRecordException {
if (failAfterN >= recordCount) {
throw new MalformedRecordException("Intentional Unit Test Exception because " + recordCount + " records have been read");
}
recordCount++;
if (!itr.hasNext()) {
return null;
}
final Object[] values = itr.next();
final Map<String, Object> valueMap = new HashMap<>();
int i = 0;
for (final RecordField field : fields) {
final String fieldName = field.getFieldName();
valueMap.put(fieldName, values[i++]);
}
return new MapRecord(new SimpleRecordSchema(fields), valueMap);
}
@Override
public RecordSchema getSchema() {
return new SimpleRecordSchema(fields);
}
};
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class PutElasticsearchHttpRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
// Authentication
final String username = context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions(flowFile).getValue();
OkHttpClient okHttpClient = getClient();
final ComponentLog logger = getLogger();
final String baseUrl = trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder().addPathSegment("_bulk");
// Find the user-added properties and set them as query parameters on the URL
for (Map.Entry<PropertyDescriptor, String> property : context.getProperties().entrySet()) {
PropertyDescriptor pd = property.getKey();
if (pd.isDynamic()) {
if (property.getValue() != null) {
urlBuilder = urlBuilder.addQueryParameter(pd.getName(), context.getProperty(pd).evaluateAttributeExpressions().getValue());
}
}
}
final URL url = urlBuilder.build().url();
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isEmpty(index)) {
logger.error("No value for index in for {}, transferring to failure", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isEmpty(indexOp)) {
logger.error("No Index operation specified for {}, transferring to failure.", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
switch(indexOp.toLowerCase()) {
case "index":
case "update":
case "upsert":
case "delete":
break;
default:
logger.error("Index operation {} not supported for {}, transferring to failure.", new Object[] { indexOp, flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
this.nullSuppression = context.getProperty(SUPPRESS_NULLS).getValue();
final String id_path = context.getProperty(ID_RECORD_PATH).evaluateAttributeExpressions(flowFile).getValue();
final RecordPath recordPath = StringUtils.isEmpty(id_path) ? null : recordPathCache.getCompiled(id_path);
final StringBuilder sb = new StringBuilder();
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
while ((record = reader.nextRecord()) != null) {
final String id;
if (recordPath != null) {
Optional<FieldValue> idPathValue = recordPath.evaluate(record).getSelectedFields().findFirst();
if (!idPathValue.isPresent() || idPathValue.get().getValue() == null) {
throw new IdentifierNotFoundException("Identifier Record Path specified but no value was found, transferring {} to failure.");
}
id = idPathValue.get().getValue().toString();
} else {
id = null;
}
// a missing ID indicates one is to be auto-generated by Elasticsearch
if (id == null && !indexOp.equalsIgnoreCase("index")) {
throw new IdentifierNotFoundException("Index operation {} requires a valid identifier value from a flow file attribute, transferring to failure.");
}
final StringBuilder json = new StringBuilder();
ByteArrayOutputStream out = new ByteArrayOutputStream();
JsonGenerator generator = factory.createJsonGenerator(out);
writeRecord(record, record.getSchema(), generator);
generator.flush();
generator.close();
json.append(out.toString());
if (indexOp.equalsIgnoreCase("index")) {
sb.append("{\"index\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\"");
if (!StringUtils.isEmpty(id)) {
sb.append(", \"_id\": \"");
sb.append(id);
sb.append("\"");
}
sb.append("}}\n");
sb.append(json);
sb.append("\n");
} else if (indexOp.equalsIgnoreCase("upsert") || indexOp.equalsIgnoreCase("update")) {
sb.append("{\"update\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\", \"_id\": \"");
sb.append(id);
sb.append("\" }\n");
sb.append("{\"doc\": ");
sb.append(json);
sb.append(", \"doc_as_upsert\": ");
sb.append(indexOp.equalsIgnoreCase("upsert"));
sb.append(" }\n");
} else if (indexOp.equalsIgnoreCase("delete")) {
sb.append("{\"delete\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\", \"_id\": \"");
sb.append(id);
sb.append("\" }\n");
}
}
} catch (IdentifierNotFoundException infe) {
logger.error(infe.getMessage(), new Object[] { flowFile });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
} catch (final IOException | SchemaNotFoundException | MalformedRecordException e) {
logger.error("Could not parse incoming data", e);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), sb.toString());
final Response getResponse;
try {
getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "PUT", requestBody);
} catch (final Exception e) {
logger.error("Routing to {} due to exception: {}", new Object[] { REL_FAILURE.getName(), e }, e);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
final int statusCode = getResponse.code();
if (isSuccess(statusCode)) {
ResponseBody responseBody = getResponse.body();
try {
final byte[] bodyBytes = responseBody.bytes();
JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
boolean errors = responseJson.get("errors").asBoolean(false);
// ES has no rollback, so if errors occur, log them and route the whole flow file to failure
if (errors) {
ArrayNode itemNodeArray = (ArrayNode) responseJson.get("items");
if (itemNodeArray.size() > 0) {
// All items are returned whether they succeeded or failed, so iterate through the item array
// at the same time as the flow file list, moving each to success or failure accordingly,
// but only keep the first error for logging
String errorReason = null;
for (int i = itemNodeArray.size() - 1; i >= 0; i--) {
JsonNode itemNode = itemNodeArray.get(i);
int status = itemNode.findPath("status").asInt();
if (!isSuccess(status)) {
if (errorReason == null) {
// Use "result" if it is present; this happens for status codes like 404 Not Found, which may not have an error/reason
String reason = itemNode.findPath("//result").asText();
if (StringUtils.isEmpty(reason)) {
// If there was no result, we expect an error with a string description in the "reason" field
reason = itemNode.findPath("//error/reason").asText();
}
errorReason = reason;
logger.error("Failed to process {} due to {}, transferring to failure", new Object[] { flowFile, errorReason });
}
}
}
}
session.transfer(flowFile, REL_FAILURE);
} else {
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().send(flowFile, url.toString());
}
} catch (IOException ioe) {
// Something went wrong when parsing the response, log the error and route to failure
logger.error("Error parsing Bulk API response: {}", new Object[] { ioe.getMessage() }, ioe);
session.transfer(flowFile, REL_FAILURE);
context.yield();
}
} else if (statusCode / 100 == 5) {
// 5xx -> RETRY, but a server error might last a while, so yield
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_RETRY);
context.yield();
} else {
// 1xx, 3xx, 4xx, etc. -> NO RETRY
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to failure", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_FAILURE);
}
getResponse.close();
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class CommaSeparatedRecordReader method createRecordReader.
@Override
public RecordReader createRecordReader(Map<String, String> variables, final InputStream in, final ComponentLog logger) throws IOException, SchemaNotFoundException {
final BufferedReader reader = new BufferedReader(new InputStreamReader(in));
final List<RecordField> fields = new ArrayList<>();
final String headerLine = reader.readLine();
for (final String colName : headerLine.split(",")) {
fields.add(new RecordField(colName.trim(), RecordFieldType.STRING.getDataType()));
}
return new RecordReader() {
@Override
public void close() throws IOException {
reader.close();
}
@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknown) throws IOException, MalformedRecordException {
if (failAfterN > -1 && recordCount >= failAfterN) {
throw new MalformedRecordException("Intentional Unit Test Exception because " + recordCount + " records have been read");
}
final String nextLine = reader.readLine();
if (nextLine == null) {
return null;
}
recordCount++;
final String[] values = nextLine.split(",");
final Map<String, Object> valueMap = new HashMap<>();
int i = 0;
for (final RecordField field : fields) {
final String fieldName = field.getFieldName();
valueMap.put(fieldName, values[i++].trim());
}
return new MapRecord(new SimpleRecordSchema(fields), valueMap);
}
@Override
public RecordSchema getSchema() {
return new SimpleRecordSchema(fields);
}
};
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class ListenTCPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final SocketChannelRecordReader socketRecordReader = pollForSocketRecordReader();
if (socketRecordReader == null) {
return;
}
if (socketRecordReader.isClosed()) {
getLogger().warn("Unable to read records from {}, socket already closed", new Object[] { getRemoteAddress(socketRecordReader) });
// still need to call close so the overall count is decremented
IOUtils.closeQuietly(socketRecordReader);
return;
}
final int recordBatchSize = context.getProperty(RECORD_BATCH_SIZE).asInteger();
final String readerErrorHandling = context.getProperty(READER_ERROR_HANDLING_STRATEGY).getValue();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
// synchronize to ensure there are no stale values in the underlying SocketChannel
synchronized (socketRecordReader) {
FlowFile flowFile = session.create();
try {
// lazily creating the record reader here b/c we need a flow file, eventually shouldn't have to do this
RecordReader recordReader = socketRecordReader.getRecordReader();
if (recordReader == null) {
recordReader = socketRecordReader.createRecordReader(flowFile, getLogger());
}
Record record;
try {
record = recordReader.nextRecord();
} catch (final Exception e) {
boolean timeout = false;
// some of the underlying record libraries wrap the real exception in RuntimeException, so check each
// throwable (starting with the current one) to see if its a SocketTimeoutException
Throwable cause = e;
while (cause != null) {
if (cause instanceof SocketTimeoutException) {
timeout = true;
break;
}
cause = cause.getCause();
}
if (timeout) {
getLogger().debug("Timeout reading records, will try again later", e);
socketReaders.offer(socketRecordReader);
session.remove(flowFile);
return;
} else {
throw e;
}
}
if (record == null) {
getLogger().debug("No records available from {}, closing connection", new Object[] { getRemoteAddress(socketRecordReader) });
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
String mimeType = null;
WriteResult writeResult = null;
final RecordSchema recordSchema = recordSetWriterFactory.getSchema(Collections.EMPTY_MAP, record.getSchema());
try (final OutputStream out = session.write(flowFile);
final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out)) {
// start the record set and write the first record from above
recordWriter.beginRecordSet();
writeResult = recordWriter.write(record);
while (record != null && writeResult.getRecordCount() < recordBatchSize) {
// if keeping then null out the record to break out of the loop, which will transfer what we have and close the connection
try {
record = recordReader.nextRecord();
} catch (final SocketTimeoutException ste) {
getLogger().debug("Timeout reading records, will try again later", ste);
break;
} catch (final Exception e) {
if (ERROR_HANDLING_DISCARD.getValue().equals(readerErrorHandling)) {
throw e;
} else {
record = null;
}
}
if (record != null) {
writeResult = recordWriter.write(record);
}
}
writeResult = recordWriter.finishRecordSet();
recordWriter.flush();
mimeType = recordWriter.getMimeType();
}
// if we didn't write any records then we need to remove the flow file
if (writeResult.getRecordCount() <= 0) {
getLogger().debug("Removing flow file, no records were written");
session.remove(flowFile);
} else {
final String sender = getRemoteAddress(socketRecordReader);
final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeType);
attributes.put("tcp.sender", sender);
attributes.put("tcp.port", String.valueOf(port));
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
final String senderHost = sender.startsWith("/") && sender.length() > 1 ? sender.substring(1) : sender;
final String transitUri = new StringBuilder().append("tcp").append("://").append(senderHost).append(":").append(port).toString();
session.getProvenanceReporter().receive(flowFile, transitUri);
session.transfer(flowFile, REL_SUCCESS);
}
getLogger().debug("Re-queuing connection for further processing...");
socketReaders.offer(socketRecordReader);
} catch (Exception e) {
getLogger().error("Error processing records: " + e.getMessage(), e);
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
}
}
Aggregations