use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class ParseEvtxTest method testProcess1RecordGranularity.
@Test
public void testProcess1RecordGranularity() throws IOException, MalformedChunkException, XMLStreamException {
String basename = "basename";
int chunkNum = 5;
int offset = 10001;
byte[] badChunk = { 8 };
RootNodeHandler rootNodeHandler1 = mock(RootNodeHandler.class);
RootNodeHandler rootNodeHandler2 = mock(RootNodeHandler.class);
RootNodeHandler rootNodeHandler3 = mock(RootNodeHandler.class);
OutputStream out2 = mock(OutputStream.class);
OutputStream out3 = mock(OutputStream.class);
when(rootNodeHandlerFactory.create(out)).thenReturn(rootNodeHandler1);
when(rootNodeHandlerFactory.create(out2)).thenReturn(rootNodeHandler2);
when(rootNodeHandlerFactory.create(out3)).thenReturn(rootNodeHandler3);
ChunkHeader chunkHeader1 = mock(ChunkHeader.class);
ChunkHeader chunkHeader2 = mock(ChunkHeader.class);
Record record1 = mock(Record.class);
Record record2 = mock(Record.class);
Record record3 = mock(Record.class);
RootNode rootNode1 = mock(RootNode.class);
RootNode rootNode2 = mock(RootNode.class);
RootNode rootNode3 = mock(RootNode.class);
ProcessSession session = mock(ProcessSession.class);
FlowFile flowFile = mock(FlowFile.class);
FlowFile created1 = mock(FlowFile.class);
FlowFile updated1 = mock(FlowFile.class);
FlowFile created2 = mock(FlowFile.class);
FlowFile updated2 = mock(FlowFile.class);
FlowFile created3 = mock(FlowFile.class);
FlowFile updated3 = mock(FlowFile.class);
MalformedChunkException malformedChunkException = new MalformedChunkException("Test", null, offset, chunkNum, badChunk);
when(session.create(flowFile)).thenReturn(created1).thenReturn(created2).thenReturn(created3).thenReturn(null);
when(session.write(eq(created1), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out);
return updated1;
});
when(session.write(eq(created2), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out2);
return updated2;
});
when(session.write(eq(created3), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out3);
return updated3;
});
when(record1.getRootNode()).thenReturn(rootNode1);
when(record2.getRootNode()).thenReturn(rootNode2);
when(record3.getRootNode()).thenReturn(rootNode3);
when(fileHeader.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
when(fileHeader.next()).thenThrow(malformedChunkException).thenReturn(chunkHeader1).thenReturn(chunkHeader2).thenReturn(null);
when(chunkHeader1.hasNext()).thenReturn(true).thenReturn(false);
when(chunkHeader1.next()).thenReturn(record1).thenReturn(null);
when(chunkHeader2.hasNext()).thenReturn(true).thenReturn(true).thenReturn(false);
when(chunkHeader2.next()).thenReturn(record2).thenReturn(record3).thenReturn(null);
parseEvtx.processRecordGranularity(session, componentLog, flowFile, basename, in);
verify(malformedChunkHandler).handle(flowFile, session, parseEvtx.getName(basename, chunkNum, null, ParseEvtx.EVTX_EXTENSION), badChunk);
verify(rootNodeHandler1).handle(rootNode1);
verify(rootNodeHandler1).close();
verify(rootNodeHandler2).handle(rootNode2);
verify(rootNodeHandler2).close();
verify(rootNodeHandler3).handle(rootNode3);
verify(rootNodeHandler3).close();
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class ResultProcessorTest method testProcessResultFileFalure.
@Test
public void testProcessResultFileFalure() {
ProcessSession processSession = mock(ProcessSession.class);
ComponentLog componentLog = mock(ComponentLog.class);
FlowFile flowFile = mock(FlowFile.class);
Exception exception = new Exception();
String name = "name";
when(processSession.putAttribute(eq(flowFile), anyString(), anyString())).thenReturn(flowFile);
resultProcessor.process(processSession, componentLog, flowFile, exception, name);
verify(processSession).putAttribute(flowFile, CoreAttributes.FILENAME.key(), name);
verify(processSession).putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), MediaType.APPLICATION_XML_UTF_8.toString());
verify(processSession).transfer(flowFile, failureRelationship);
verify(componentLog).error(eq(ResultProcessor.UNABLE_TO_PROCESS_DUE_TO), any(Object[].class), eq(exception));
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class AbstractListProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
Long minTimestampToListMillis = lastListedLatestEntryTimestampMillis;
if (this.lastListedLatestEntryTimestampMillis == null || this.lastProcessedLatestEntryTimestampMillis == null || justElectedPrimaryNode) {
try {
// Attempt to retrieve state from the state manager if a last listing was not yet established or
// if just elected the primary node
final StateMap stateMap = context.getStateManager().getState(getStateScope(context));
latestIdentifiersProcessed.clear();
for (Map.Entry<String, String> state : stateMap.toMap().entrySet()) {
final String k = state.getKey();
final String v = state.getValue();
if (v == null || v.isEmpty()) {
continue;
}
if (LATEST_LISTED_ENTRY_TIMESTAMP_KEY.equals(k)) {
minTimestampToListMillis = Long.parseLong(v);
// If our determined timestamp is the same as that of our last listing, skip this execution as there are no updates
if (minTimestampToListMillis.equals(this.lastListedLatestEntryTimestampMillis)) {
context.yield();
return;
} else {
this.lastListedLatestEntryTimestampMillis = minTimestampToListMillis;
}
} else if (LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY.equals(k)) {
this.lastProcessedLatestEntryTimestampMillis = Long.parseLong(v);
} else if (k.startsWith(IDENTIFIER_PREFIX)) {
latestIdentifiersProcessed.add(v);
}
}
justElectedPrimaryNode = false;
} catch (final IOException ioe) {
getLogger().error("Failed to retrieve timestamp of last listing from the State Manager. Will not perform listing until this is accomplished.");
context.yield();
return;
}
}
final List<T> entityList;
final long currentRunTimeNanos = System.nanoTime();
final long currentRunTimeMillis = System.currentTimeMillis();
try {
// track of when this last executed for consideration of the lag nanos
entityList = performListing(context, minTimestampToListMillis);
} catch (final IOException e) {
getLogger().error("Failed to perform listing on remote host due to {}", e);
context.yield();
return;
}
if (entityList == null || entityList.isEmpty()) {
context.yield();
return;
}
Long latestListedEntryTimestampThisCycleMillis = null;
final TreeMap<Long, List<T>> orderedEntries = new TreeMap<>();
// Build a sorted map to determine the latest possible entries
boolean targetSystemHasMilliseconds = false;
boolean targetSystemHasSeconds = false;
for (final T entity : entityList) {
final long entityTimestampMillis = entity.getTimestamp();
if (!targetSystemHasMilliseconds) {
targetSystemHasMilliseconds = entityTimestampMillis % 1000 > 0;
}
if (!targetSystemHasSeconds) {
targetSystemHasSeconds = entityTimestampMillis % 60_000 > 0;
}
// New entries are all those that occur at or after the associated timestamp
final boolean newEntry = minTimestampToListMillis == null || entityTimestampMillis >= minTimestampToListMillis && entityTimestampMillis >= lastProcessedLatestEntryTimestampMillis;
if (newEntry) {
List<T> entitiesForTimestamp = orderedEntries.get(entity.getTimestamp());
if (entitiesForTimestamp == null) {
entitiesForTimestamp = new ArrayList<T>();
orderedEntries.put(entity.getTimestamp(), entitiesForTimestamp);
}
entitiesForTimestamp.add(entity);
}
}
int flowfilesCreated = 0;
if (orderedEntries.size() > 0) {
latestListedEntryTimestampThisCycleMillis = orderedEntries.lastKey();
// Determine target system time precision.
String specifiedPrecision = context.getProperty(TARGET_SYSTEM_TIMESTAMP_PRECISION).getValue();
if (StringUtils.isBlank(specifiedPrecision)) {
// If TARGET_SYSTEM_TIMESTAMP_PRECISION is not supported by the Processor, then specifiedPrecision can be null, instead of its default value.
specifiedPrecision = getDefaultTimePrecision();
}
final TimeUnit targetSystemTimePrecision = PRECISION_AUTO_DETECT.getValue().equals(specifiedPrecision) ? targetSystemHasMilliseconds ? TimeUnit.MILLISECONDS : targetSystemHasSeconds ? TimeUnit.SECONDS : TimeUnit.MINUTES : PRECISION_MILLIS.getValue().equals(specifiedPrecision) ? TimeUnit.MILLISECONDS : PRECISION_SECONDS.getValue().equals(specifiedPrecision) ? TimeUnit.SECONDS : TimeUnit.MINUTES;
final Long listingLagMillis = LISTING_LAG_MILLIS.get(targetSystemTimePrecision);
// another iteration has occurred without new files and special handling is needed to avoid starvation
if (latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis)) {
/* We need to wait for another cycle when either:
* - If we have not eclipsed the minimal listing lag needed due to being triggered too soon after the last run
* - The latest listed entity timestamp is equal to the last processed time, meaning we handled those items originally passed over. No need to process it again.
*/
final long listingLagNanos = TimeUnit.MILLISECONDS.toNanos(listingLagMillis);
if (currentRunTimeNanos - lastRunTimeNanos < listingLagNanos || (latestListedEntryTimestampThisCycleMillis.equals(lastProcessedLatestEntryTimestampMillis) && orderedEntries.get(latestListedEntryTimestampThisCycleMillis).stream().allMatch(entity -> latestIdentifiersProcessed.contains(entity.getIdentifier())))) {
context.yield();
return;
}
} else {
// Convert minimum reliable timestamp into target system time unit, in order to truncate unreliable digits.
final long minimumReliableTimestampInFilesystemTimeUnit = targetSystemTimePrecision.convert(currentRunTimeMillis - listingLagMillis, TimeUnit.MILLISECONDS);
final long minimumReliableTimestampMillis = targetSystemTimePrecision.toMillis(minimumReliableTimestampInFilesystemTimeUnit);
// The minimum timestamp should be reliable to determine that no further entries will be added with the same timestamp based on the target system time precision.
if (minimumReliableTimestampMillis < latestListedEntryTimestampThisCycleMillis) {
// Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
orderedEntries.remove(latestListedEntryTimestampThisCycleMillis);
}
}
for (Map.Entry<Long, List<T>> timestampEntities : orderedEntries.entrySet()) {
List<T> entities = timestampEntities.getValue();
if (timestampEntities.getKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// Filter out previously processed entities.
entities = entities.stream().filter(entity -> !latestIdentifiersProcessed.contains(entity.getIdentifier())).collect(Collectors.toList());
}
for (T entity : entities) {
// Create the FlowFile for this path.
final Map<String, String> attributes = createAttributes(entity, context);
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
flowfilesCreated++;
}
}
}
// As long as we have a listing timestamp, there is meaningful state to capture regardless of any outputs generated
if (latestListedEntryTimestampThisCycleMillis != null) {
boolean processedNewFiles = flowfilesCreated > 0;
if (processedNewFiles) {
// because latestListedEntryTimestampThisCycleMillis might be removed if it's not old enough.
if (!orderedEntries.lastKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// If the latest timestamp at this cycle becomes different than the previous one, we need to clear identifiers.
// If it didn't change, we need to add identifiers.
latestIdentifiersProcessed.clear();
}
// Capture latestIdentifierProcessed.
latestIdentifiersProcessed.addAll(orderedEntries.lastEntry().getValue().stream().map(T::getIdentifier).collect(Collectors.toList()));
lastProcessedLatestEntryTimestampMillis = orderedEntries.lastKey();
getLogger().info("Successfully created listing with {} new objects", new Object[] { flowfilesCreated });
session.commit();
}
lastRunTimeNanos = currentRunTimeNanos;
if (!latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis) || processedNewFiles) {
// the distributed state cache, the node can continue to run (if it is primary node).
try {
lastListedLatestEntryTimestampMillis = latestListedEntryTimestampThisCycleMillis;
persist(latestListedEntryTimestampThisCycleMillis, lastProcessedLatestEntryTimestampMillis, latestIdentifiersProcessed, context.getStateManager(), getStateScope(context));
} catch (final IOException ioe) {
getLogger().warn("Unable to save state due to {}. If NiFi is restarted before state is saved, or " + "if another node begins executing this Processor, data duplication may occur.", ioe);
}
}
} else {
getLogger().debug("There is no data to list. Yielding.");
context.yield();
// lastListingTime = 0 so that we don't continually poll the distributed cache / local file system
if (lastListedLatestEntryTimestampMillis == null) {
lastListedLatestEntryTimestampMillis = 0L;
}
return;
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class PartialFunctions method onTrigger.
public static void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory, ComponentLog logger, OnTrigger onTrigger, RollbackSession rollbackSession) throws ProcessException {
final ProcessSession session = sessionFactory.createSession();
try {
onTrigger.execute(session);
session.commit();
} catch (final Throwable t) {
logger.error("{} failed to process due to {}; rolling back session", new Object[] { onTrigger, t });
rollbackSession.rollback(session, t);
throw t;
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class AbstractPort method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
final ProcessSession session = sessionFactory.createSession();
try {
onTrigger(context, session);
session.commit();
} catch (final ProcessException e) {
session.rollback();
throw e;
} catch (final Throwable t) {
session.rollback();
throw new RuntimeException(t);
}
}
Aggregations