use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class ScanHBase method finalizeFlowFile.
private void finalizeFlowFile(final ProcessSession session, final HBaseClientService hBaseClientService, FlowFile flowFile, final String tableName, Long rowsPulled, Exception e) {
Relationship rel = REL_SUCCESS;
flowFile = session.putAttribute(flowFile, HBASE_ROWS_COUNT_ATTR, rowsPulled.toString());
final AtomicReference<IOException> ioe = new AtomicReference<>(null);
flowFile = session.append(flowFile, (out) -> {
try {
out.write("]".getBytes());
} catch (IOException ei) {
ioe.set(ei);
}
});
if (e != null || ioe.get() != null) {
flowFile = session.putAttribute(flowFile, "scanhbase.error", (e == null ? e : ioe.get()).toString());
rel = REL_FAILURE;
} else {
session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, "{ids}"));
}
session.transfer(flowFile, rel);
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class ConvertAvroToORC method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
long startTime = System.currentTimeMillis();
final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
final AtomicReference<Schema> hiveAvroSchema = new AtomicReference<>(null);
final AtomicInteger totalRecordCount = new AtomicInteger(0);
final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());
flowFile = session.write(flowFile, (rawIn, rawOut) -> {
try (final InputStream in = new BufferedInputStream(rawIn);
final OutputStream out = new BufferedOutputStream(rawOut);
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<>())) {
// Create ORC schema from Avro schema
Schema avroSchema = reader.getSchema();
TypeInfo orcSchema = NiFiOrcUtils.getOrcField(avroSchema);
if (orcConfig == null) {
orcConfig = new Configuration();
}
OrcFlowFileWriter orcWriter = NiFiOrcUtils.createWriter(out, new Path(fileName), orcConfig, orcSchema, stripeSize, compressionType, bufferSize);
try {
int recordCount = 0;
GenericRecord currRecord = null;
while (reader.hasNext()) {
currRecord = reader.next(currRecord);
List<Schema.Field> fields = currRecord.getSchema().getFields();
if (fields != null) {
Object[] row = new Object[fields.size()];
for (int i = 0; i < fields.size(); i++) {
Schema.Field field = fields.get(i);
Schema fieldSchema = field.schema();
Object o = currRecord.get(field.name());
try {
row[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), o);
} catch (ArrayIndexOutOfBoundsException aioobe) {
getLogger().error("Index out of bounds at record {} for column {}, type {}, and object {}", new Object[] { recordCount, i, fieldSchema.getType().getName(), o.toString() }, aioobe);
throw new IOException(aioobe);
}
}
orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, row));
recordCount++;
}
}
hiveAvroSchema.set(avroSchema);
totalRecordCount.set(recordCount);
} finally {
// finished writing this record, close the writer (which will flush to the flow file)
orcWriter.close();
}
}
});
final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() : NiFiOrcUtils.normalizeHiveTableName(hiveAvroSchema.get().getFullName());
String hiveDDL = NiFiOrcUtils.generateHiveDDL(hiveAvroSchema.get(), hiveTableName);
// Add attributes and transfer to success
flowFile = session.putAttribute(flowFile, RECORD_COUNT_ATTRIBUTE, Integer.toString(totalRecordCount.get()));
flowFile = session.putAttribute(flowFile, HIVE_DDL_ATTRIBUTE, hiveDDL);
StringBuilder newFilename = new StringBuilder();
int extensionIndex = fileName.lastIndexOf(".");
if (extensionIndex != -1) {
newFilename.append(fileName.substring(0, extensionIndex));
} else {
newFilename.append(fileName);
}
newFilename.append(".orc");
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), ORC_MIME_TYPE);
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename.toString());
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().modifyContent(flowFile, "Converted " + totalRecordCount.get() + " records", System.currentTimeMillis() - startTime);
} catch (final ProcessException pe) {
getLogger().error("Failed to convert {} from Avro to ORC due to {}; transferring to failure", new Object[] { flowFile, pe });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class FlowFileIngestService method send.
/**
* Handle receipt of a FlowFileRequest and route it to the appropriate process relationship.
*
* @param request the flowfile request
* @param responseObserver the mechanism by which to reply to the client
*/
@Override
public void send(final org.apache.nifi.processors.grpc.FlowFileRequest request, final StreamObserver<FlowFileReply> responseObserver) {
final FlowFileReply.Builder replyBuilder = FlowFileReply.newBuilder();
final String remoteHost = FlowFileIngestServiceInterceptor.REMOTE_HOST_KEY.get();
final String remoteDN = FlowFileIngestServiceInterceptor.REMOTE_DN_KEY.get();
// block until we have a session factory (occurs when processor is triggered)
ProcessSessionFactory sessionFactory = null;
while (sessionFactory == null) {
sessionFactory = sessionFactoryReference.get();
if (sessionFactory == null) {
try {
Thread.sleep(10);
} catch (final InterruptedException e) {
}
}
}
final ProcessSession session = sessionFactory.createSession();
// if there's no space available, reject the request.
final long n = filesReceived.getAndIncrement() % FILES_BEFORE_CHECKING_DESTINATION_SPACE;
if (n == 0 || !spaceAvailable.get()) {
if (context.getAvailableRelationships().isEmpty()) {
spaceAvailable.set(false);
final String message = "Received request from " + remoteHost + " but no space available; Indicating Service Unavailable";
if (logger.isDebugEnabled()) {
logger.debug(message);
}
final FlowFileReply reply = replyBuilder.setResponseCode(FlowFileReply.ResponseCode.ERROR).setBody(message).build();
responseObserver.onNext(reply);
responseObserver.onCompleted();
return;
} else {
spaceAvailable.set(true);
}
}
if (logger.isDebugEnabled()) {
logger.debug("Received request from " + remoteHost);
}
final long startNanos = System.nanoTime();
FlowFile flowFile = session.create();
// push the attributes provided onto the created flowfile
final Map<String, String> attributes = Maps.newHashMap();
attributes.putAll(request.getAttributesMap());
String sourceSystemFlowFileIdentifier = attributes.get(CoreAttributes.UUID.key());
if (sourceSystemFlowFileIdentifier != null) {
sourceSystemFlowFileIdentifier = "urn:nifi:" + sourceSystemFlowFileIdentifier;
// If we receveied a UUID, we want to give the FlowFile a new UUID and register the sending system's
// identifier as the SourceSystemFlowFileIdentifier field in the Provenance RECEIVE event
attributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString());
}
flowFile = session.putAllAttributes(flowFile, attributes);
final ByteString content = request.getContent();
final InputStream contentStream = content.newInput();
// write the provided content to the flowfile
flowFile = session.write(flowFile, out -> {
try (final BufferedOutputStream bos = new BufferedOutputStream(out, 65536)) {
IOUtils.copy(contentStream, bos);
}
});
final long transferNanos = System.nanoTime() - startNanos;
final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
session.getProvenanceReporter().receive(flowFile, SERVICE_NAME, sourceSystemFlowFileIdentifier, "Remote DN=" + remoteDN, transferMillis);
flowFile = session.putAttribute(flowFile, ListenGRPC.REMOTE_HOST, remoteHost);
flowFile = session.putAttribute(flowFile, ListenGRPC.REMOTE_USER_DN, remoteDN);
// register success
session.transfer(flowFile, ListenGRPC.REL_SUCCESS);
session.commit();
// reply to client
final FlowFileReply reply = replyBuilder.setResponseCode(FlowFileReply.ResponseCode.SUCCESS).setBody("FlowFile successfully received.").build();
responseObserver.onNext(reply);
responseObserver.onCompleted();
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class StandardProcessSession method migrate.
private void migrate(final StandardProcessSession newOwner, Collection<FlowFile> flowFiles) {
// We don't call validateRecordState() here because we want to allow migration of FlowFiles that have already been marked as removed or transferred, etc.
flowFiles = flowFiles.stream().map(this::getMostRecent).collect(Collectors.toList());
for (final FlowFile flowFile : flowFiles) {
if (openInputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open InputStream for the FlowFile, created by calling ProcessSession.read(FlowFile)");
}
if (openOutputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open OutputStream for the FlowFile, created by calling ProcessSession.write(FlowFile)");
}
if (readRecursionSet.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or InputStream created by ProcessSession.read(FlowFile) has not been closed");
}
if (writeRecursionSet.contains(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or OutputStream created by ProcessSession.write(FlowFile) has not been closed");
}
final StandardRepositoryRecord record = records.get(flowFile);
if (record == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
}
// If we have a FORK event for one of the given FlowFiles, then all children must also be migrated. Otherwise, we
// could have a case where we have FlowFile A transferred and eventually exiting the flow and later the 'newOwner'
// ProcessSession is committed, claiming to have created FlowFiles from the parent, which is no longer even in
// the flow. This would be very confusing when looking at the provenance for the FlowFile, so it is best to avoid this.
final Set<String> flowFileIds = flowFiles.stream().map(ff -> ff.getAttribute(CoreAttributes.UUID.key())).collect(Collectors.toSet());
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
if (flowFiles.contains(eventFlowFile)) {
final ProvenanceEventBuilder eventBuilder = entry.getValue();
for (final String childId : eventBuilder.getChildFlowFileIds()) {
if (!flowFileIds.contains(childId)) {
throw new IllegalStateException("Cannot migrate " + eventFlowFile + " to a new session because it was forked to create " + eventBuilder.getChildFlowFileIds().size() + " children and not all children are being migrated. If any FlowFile is forked, all of its children must also be migrated at the same time as the forked FlowFile");
}
}
}
}
// event builder for the new owner of the FlowFile and remove the child from our fork event builder.
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
final ProvenanceEventBuilder eventBuilder = entry.getValue();
final Set<String> childrenIds = new HashSet<>(eventBuilder.getChildFlowFileIds());
ProvenanceEventBuilder copy = null;
for (final FlowFile flowFile : flowFiles) {
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (childrenIds.contains(flowFileId)) {
eventBuilder.removeChildFlowFile(flowFile);
if (copy == null) {
copy = eventBuilder.copy();
copy.getChildFlowFileIds().clear();
}
copy.addChildFlowFile(flowFileId);
}
}
if (copy != null) {
newOwner.forkEventBuilders.put(eventFlowFile, copy);
}
}
newOwner.processingStartTime = Math.min(newOwner.processingStartTime, processingStartTime);
for (final FlowFile flowFile : flowFiles) {
final FlowFileRecord flowFileRecord = (FlowFileRecord) flowFile;
final StandardRepositoryRecord repoRecord = this.records.remove(flowFile);
newOwner.records.put(flowFileRecord, repoRecord);
// Adjust the counts for Connections for each FlowFile that was pulled from a Connection.
// We do not have to worry about accounting for 'input counts' on connections because those
// are incremented only during a checkpoint, and anything that's been checkpointed has
// also been committed above.
final FlowFileQueue inputQueue = repoRecord.getOriginalQueue();
if (inputQueue != null) {
final String connectionId = inputQueue.getIdentifier();
incrementConnectionOutputCounts(connectionId, -1, -repoRecord.getOriginal().getSize());
newOwner.incrementConnectionOutputCounts(connectionId, 1, repoRecord.getOriginal().getSize());
unacknowledgedFlowFiles.get(inputQueue).remove(flowFile);
newOwner.unacknowledgedFlowFiles.computeIfAbsent(inputQueue, queue -> new HashSet<>()).add(flowFileRecord);
flowFilesIn--;
contentSizeIn -= flowFile.getSize();
newOwner.flowFilesIn++;
newOwner.contentSizeIn += flowFile.getSize();
}
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (removedFlowFiles.remove(flowFileId)) {
newOwner.removedFlowFiles.add(flowFileId);
newOwner.removedCount++;
newOwner.removedBytes += flowFile.getSize();
removedCount--;
removedBytes -= flowFile.getSize();
}
if (createdFlowFiles.remove(flowFileId)) {
newOwner.createdFlowFiles.add(flowFileId);
}
if (repoRecord.getTransferRelationship() != null) {
flowFilesOut--;
contentSizeOut -= flowFile.getSize();
newOwner.flowFilesOut++;
newOwner.contentSizeOut += flowFile.getSize();
}
final List<ProvenanceEventRecord> events = generatedProvenanceEvents.remove(flowFile);
if (events != null) {
newOwner.generatedProvenanceEvents.put(flowFile, events);
}
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
if (currentClaim != null) {
final ByteCountingOutputStream appendableStream = appendableStreams.remove(currentClaim);
if (appendableStream != null) {
newOwner.appendableStreams.put(currentClaim, appendableStream);
}
}
final Path toDelete = deleteOnCommit.remove(flowFile);
if (toDelete != null) {
newOwner.deleteOnCommit.put(flowFile, toDelete);
}
}
provenanceReporter.migrate(newOwner.provenanceReporter, flowFileIds);
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class WeakHashMapProcessSessionFactory method createSession.
@Override
public synchronized ProcessSession createSession() {
if (terminated) {
throw new TerminatedTaskException();
}
final ProcessSession session = delegate.createSession();
sessionMap.put(session, Boolean.TRUE);
return session;
}
Aggregations