Search in sources :

Example 91 with ProcessSession

use of org.apache.nifi.processor.ProcessSession in project nifi by apache.

the class ParseEvtxTest method testProcess1RecordGranularity.

@Test
public void testProcess1RecordGranularity() throws IOException, MalformedChunkException, XMLStreamException {
    String basename = "basename";
    int chunkNum = 5;
    int offset = 10001;
    byte[] badChunk = { 8 };
    RootNodeHandler rootNodeHandler1 = mock(RootNodeHandler.class);
    RootNodeHandler rootNodeHandler2 = mock(RootNodeHandler.class);
    RootNodeHandler rootNodeHandler3 = mock(RootNodeHandler.class);
    OutputStream out2 = mock(OutputStream.class);
    OutputStream out3 = mock(OutputStream.class);
    when(rootNodeHandlerFactory.create(out)).thenReturn(rootNodeHandler1);
    when(rootNodeHandlerFactory.create(out2)).thenReturn(rootNodeHandler2);
    when(rootNodeHandlerFactory.create(out3)).thenReturn(rootNodeHandler3);
    ChunkHeader chunkHeader1 = mock(ChunkHeader.class);
    ChunkHeader chunkHeader2 = mock(ChunkHeader.class);
    Record record1 = mock(Record.class);
    Record record2 = mock(Record.class);
    Record record3 = mock(Record.class);
    RootNode rootNode1 = mock(RootNode.class);
    RootNode rootNode2 = mock(RootNode.class);
    RootNode rootNode3 = mock(RootNode.class);
    ProcessSession session = mock(ProcessSession.class);
    FlowFile flowFile = mock(FlowFile.class);
    FlowFile created1 = mock(FlowFile.class);
    FlowFile updated1 = mock(FlowFile.class);
    FlowFile created2 = mock(FlowFile.class);
    FlowFile updated2 = mock(FlowFile.class);
    FlowFile created3 = mock(FlowFile.class);
    FlowFile updated3 = mock(FlowFile.class);
    MalformedChunkException malformedChunkException = new MalformedChunkException("Test", null, offset, chunkNum, badChunk);
    when(session.create(flowFile)).thenReturn(created1).thenReturn(created2).thenReturn(created3).thenReturn(null);
    when(session.write(eq(created1), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out);
        return updated1;
    });
    when(session.write(eq(created2), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out2);
        return updated2;
    });
    when(session.write(eq(created3), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out3);
        return updated3;
    });
    when(record1.getRootNode()).thenReturn(rootNode1);
    when(record2.getRootNode()).thenReturn(rootNode2);
    when(record3.getRootNode()).thenReturn(rootNode3);
    when(fileHeader.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
    when(fileHeader.next()).thenThrow(malformedChunkException).thenReturn(chunkHeader1).thenReturn(chunkHeader2).thenReturn(null);
    when(chunkHeader1.hasNext()).thenReturn(true).thenReturn(false);
    when(chunkHeader1.next()).thenReturn(record1).thenReturn(null);
    when(chunkHeader2.hasNext()).thenReturn(true).thenReturn(true).thenReturn(false);
    when(chunkHeader2.next()).thenReturn(record2).thenReturn(record3).thenReturn(null);
    parseEvtx.processRecordGranularity(session, componentLog, flowFile, basename, in);
    verify(malformedChunkHandler).handle(flowFile, session, parseEvtx.getName(basename, chunkNum, null, ParseEvtx.EVTX_EXTENSION), badChunk);
    verify(rootNodeHandler1).handle(rootNode1);
    verify(rootNodeHandler1).close();
    verify(rootNodeHandler2).handle(rootNode2);
    verify(rootNodeHandler2).close();
    verify(rootNodeHandler3).handle(rootNode3);
    verify(rootNodeHandler3).close();
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) RootNode(org.apache.nifi.processors.evtx.parser.bxml.RootNode) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) OutputStream(java.io.OutputStream) ChunkHeader(org.apache.nifi.processors.evtx.parser.ChunkHeader) Record(org.apache.nifi.processors.evtx.parser.Record) Mockito.anyString(org.mockito.Mockito.anyString) MalformedChunkException(org.apache.nifi.processors.evtx.parser.MalformedChunkException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) Test(org.junit.Test)

Example 92 with ProcessSession

use of org.apache.nifi.processor.ProcessSession in project nifi by apache.

the class ResultProcessorTest method testProcessResultFileFalure.

@Test
public void testProcessResultFileFalure() {
    ProcessSession processSession = mock(ProcessSession.class);
    ComponentLog componentLog = mock(ComponentLog.class);
    FlowFile flowFile = mock(FlowFile.class);
    Exception exception = new Exception();
    String name = "name";
    when(processSession.putAttribute(eq(flowFile), anyString(), anyString())).thenReturn(flowFile);
    resultProcessor.process(processSession, componentLog, flowFile, exception, name);
    verify(processSession).putAttribute(flowFile, CoreAttributes.FILENAME.key(), name);
    verify(processSession).putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), MediaType.APPLICATION_XML_UTF_8.toString());
    verify(processSession).transfer(flowFile, failureRelationship);
    verify(componentLog).error(eq(ResultProcessor.UNABLE_TO_PROCESS_DUE_TO), any(Object[].class), eq(exception));
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) Matchers.anyString(org.mockito.Matchers.anyString) ComponentLog(org.apache.nifi.logging.ComponentLog) Test(org.junit.Test)

Example 93 with ProcessSession

use of org.apache.nifi.processor.ProcessSession in project nifi by apache.

the class AbstractListProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    Long minTimestampToListMillis = lastListedLatestEntryTimestampMillis;
    if (this.lastListedLatestEntryTimestampMillis == null || this.lastProcessedLatestEntryTimestampMillis == null || justElectedPrimaryNode) {
        try {
            // Attempt to retrieve state from the state manager if a last listing was not yet established or
            // if just elected the primary node
            final StateMap stateMap = context.getStateManager().getState(getStateScope(context));
            latestIdentifiersProcessed.clear();
            for (Map.Entry<String, String> state : stateMap.toMap().entrySet()) {
                final String k = state.getKey();
                final String v = state.getValue();
                if (v == null || v.isEmpty()) {
                    continue;
                }
                if (LATEST_LISTED_ENTRY_TIMESTAMP_KEY.equals(k)) {
                    minTimestampToListMillis = Long.parseLong(v);
                    // If our determined timestamp is the same as that of our last listing, skip this execution as there are no updates
                    if (minTimestampToListMillis.equals(this.lastListedLatestEntryTimestampMillis)) {
                        context.yield();
                        return;
                    } else {
                        this.lastListedLatestEntryTimestampMillis = minTimestampToListMillis;
                    }
                } else if (LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY.equals(k)) {
                    this.lastProcessedLatestEntryTimestampMillis = Long.parseLong(v);
                } else if (k.startsWith(IDENTIFIER_PREFIX)) {
                    latestIdentifiersProcessed.add(v);
                }
            }
            justElectedPrimaryNode = false;
        } catch (final IOException ioe) {
            getLogger().error("Failed to retrieve timestamp of last listing from the State Manager. Will not perform listing until this is accomplished.");
            context.yield();
            return;
        }
    }
    final List<T> entityList;
    final long currentRunTimeNanos = System.nanoTime();
    final long currentRunTimeMillis = System.currentTimeMillis();
    try {
        // track of when this last executed for consideration of the lag nanos
        entityList = performListing(context, minTimestampToListMillis);
    } catch (final IOException e) {
        getLogger().error("Failed to perform listing on remote host due to {}", e);
        context.yield();
        return;
    }
    if (entityList == null || entityList.isEmpty()) {
        context.yield();
        return;
    }
    Long latestListedEntryTimestampThisCycleMillis = null;
    final TreeMap<Long, List<T>> orderedEntries = new TreeMap<>();
    // Build a sorted map to determine the latest possible entries
    boolean targetSystemHasMilliseconds = false;
    boolean targetSystemHasSeconds = false;
    for (final T entity : entityList) {
        final long entityTimestampMillis = entity.getTimestamp();
        if (!targetSystemHasMilliseconds) {
            targetSystemHasMilliseconds = entityTimestampMillis % 1000 > 0;
        }
        if (!targetSystemHasSeconds) {
            targetSystemHasSeconds = entityTimestampMillis % 60_000 > 0;
        }
        // New entries are all those that occur at or after the associated timestamp
        final boolean newEntry = minTimestampToListMillis == null || entityTimestampMillis >= minTimestampToListMillis && entityTimestampMillis >= lastProcessedLatestEntryTimestampMillis;
        if (newEntry) {
            List<T> entitiesForTimestamp = orderedEntries.get(entity.getTimestamp());
            if (entitiesForTimestamp == null) {
                entitiesForTimestamp = new ArrayList<T>();
                orderedEntries.put(entity.getTimestamp(), entitiesForTimestamp);
            }
            entitiesForTimestamp.add(entity);
        }
    }
    int flowfilesCreated = 0;
    if (orderedEntries.size() > 0) {
        latestListedEntryTimestampThisCycleMillis = orderedEntries.lastKey();
        // Determine target system time precision.
        String specifiedPrecision = context.getProperty(TARGET_SYSTEM_TIMESTAMP_PRECISION).getValue();
        if (StringUtils.isBlank(specifiedPrecision)) {
            // If TARGET_SYSTEM_TIMESTAMP_PRECISION is not supported by the Processor, then specifiedPrecision can be null, instead of its default value.
            specifiedPrecision = getDefaultTimePrecision();
        }
        final TimeUnit targetSystemTimePrecision = PRECISION_AUTO_DETECT.getValue().equals(specifiedPrecision) ? targetSystemHasMilliseconds ? TimeUnit.MILLISECONDS : targetSystemHasSeconds ? TimeUnit.SECONDS : TimeUnit.MINUTES : PRECISION_MILLIS.getValue().equals(specifiedPrecision) ? TimeUnit.MILLISECONDS : PRECISION_SECONDS.getValue().equals(specifiedPrecision) ? TimeUnit.SECONDS : TimeUnit.MINUTES;
        final Long listingLagMillis = LISTING_LAG_MILLIS.get(targetSystemTimePrecision);
        // another iteration has occurred without new files and special handling is needed to avoid starvation
        if (latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis)) {
            /* We need to wait for another cycle when either:
                 *   - If we have not eclipsed the minimal listing lag needed due to being triggered too soon after the last run
                 *   - The latest listed entity timestamp is equal to the last processed time, meaning we handled those items originally passed over. No need to process it again.
                 */
            final long listingLagNanos = TimeUnit.MILLISECONDS.toNanos(listingLagMillis);
            if (currentRunTimeNanos - lastRunTimeNanos < listingLagNanos || (latestListedEntryTimestampThisCycleMillis.equals(lastProcessedLatestEntryTimestampMillis) && orderedEntries.get(latestListedEntryTimestampThisCycleMillis).stream().allMatch(entity -> latestIdentifiersProcessed.contains(entity.getIdentifier())))) {
                context.yield();
                return;
            }
        } else {
            // Convert minimum reliable timestamp into target system time unit, in order to truncate unreliable digits.
            final long minimumReliableTimestampInFilesystemTimeUnit = targetSystemTimePrecision.convert(currentRunTimeMillis - listingLagMillis, TimeUnit.MILLISECONDS);
            final long minimumReliableTimestampMillis = targetSystemTimePrecision.toMillis(minimumReliableTimestampInFilesystemTimeUnit);
            // The minimum timestamp should be reliable to determine that no further entries will be added with the same timestamp based on the target system time precision.
            if (minimumReliableTimestampMillis < latestListedEntryTimestampThisCycleMillis) {
                // Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
                orderedEntries.remove(latestListedEntryTimestampThisCycleMillis);
            }
        }
        for (Map.Entry<Long, List<T>> timestampEntities : orderedEntries.entrySet()) {
            List<T> entities = timestampEntities.getValue();
            if (timestampEntities.getKey().equals(lastProcessedLatestEntryTimestampMillis)) {
                // Filter out previously processed entities.
                entities = entities.stream().filter(entity -> !latestIdentifiersProcessed.contains(entity.getIdentifier())).collect(Collectors.toList());
            }
            for (T entity : entities) {
                // Create the FlowFile for this path.
                final Map<String, String> attributes = createAttributes(entity, context);
                FlowFile flowFile = session.create();
                flowFile = session.putAllAttributes(flowFile, attributes);
                session.transfer(flowFile, REL_SUCCESS);
                flowfilesCreated++;
            }
        }
    }
    // As long as we have a listing timestamp, there is meaningful state to capture regardless of any outputs generated
    if (latestListedEntryTimestampThisCycleMillis != null) {
        boolean processedNewFiles = flowfilesCreated > 0;
        if (processedNewFiles) {
            // because latestListedEntryTimestampThisCycleMillis might be removed if it's not old enough.
            if (!orderedEntries.lastKey().equals(lastProcessedLatestEntryTimestampMillis)) {
                // If the latest timestamp at this cycle becomes different than the previous one, we need to clear identifiers.
                // If it didn't change, we need to add identifiers.
                latestIdentifiersProcessed.clear();
            }
            // Capture latestIdentifierProcessed.
            latestIdentifiersProcessed.addAll(orderedEntries.lastEntry().getValue().stream().map(T::getIdentifier).collect(Collectors.toList()));
            lastProcessedLatestEntryTimestampMillis = orderedEntries.lastKey();
            getLogger().info("Successfully created listing with {} new objects", new Object[] { flowfilesCreated });
            session.commit();
        }
        lastRunTimeNanos = currentRunTimeNanos;
        if (!latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis) || processedNewFiles) {
            // the distributed state cache, the node can continue to run (if it is primary node).
            try {
                lastListedLatestEntryTimestampMillis = latestListedEntryTimestampThisCycleMillis;
                persist(latestListedEntryTimestampThisCycleMillis, lastProcessedLatestEntryTimestampMillis, latestIdentifiersProcessed, context.getStateManager(), getStateScope(context));
            } catch (final IOException ioe) {
                getLogger().warn("Unable to save state due to {}. If NiFi is restarted before state is saved, or " + "if another node begins executing this Processor, data duplication may occur.", ioe);
            }
        }
    } else {
        getLogger().debug("There is no data to list. Yielding.");
        context.yield();
        // lastListingTime = 0 so that we don't continually poll the distributed cache / local file system
        if (lastListedLatestEntryTimestampMillis == null) {
            lastListedLatestEntryTimestampMillis = 0L;
        }
        return;
    }
}
Also used : Serializer(org.apache.nifi.distributed.cache.client.Serializer) HashMap(java.util.HashMap) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PrimaryNodeState(org.apache.nifi.annotation.notification.PrimaryNodeState) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) Map(java.util.Map) DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) JsonParseException(com.fasterxml.jackson.core.JsonParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) OutputStream(java.io.OutputStream) Properties(java.util.Properties) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) StringUtils(org.apache.nifi.util.StringUtils) FileInputStream(java.io.FileInputStream) AllowableValue(org.apache.nifi.components.AllowableValue) Collectors(java.util.stream.Collectors) StateMap(org.apache.nifi.components.state.StateMap) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) Deserializer(org.apache.nifi.distributed.cache.client.Deserializer) TimeUnit(java.util.concurrent.TimeUnit) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) TreeMap(java.util.TreeMap) OnPrimaryNodeStateChange(org.apache.nifi.annotation.notification.OnPrimaryNodeStateChange) DeserializationException(org.apache.nifi.distributed.cache.client.exception.DeserializationException) SerializationException(org.apache.nifi.distributed.cache.client.exception.SerializationException) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) Collections(java.util.Collections) FlowFile(org.apache.nifi.flowfile.FlowFile) StateMap(org.apache.nifi.components.state.StateMap) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TimeUnit(java.util.concurrent.TimeUnit) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) TreeMap(java.util.TreeMap)

Example 94 with ProcessSession

use of org.apache.nifi.processor.ProcessSession in project nifi by apache.

the class PartialFunctions method onTrigger.

public static void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory, ComponentLog logger, OnTrigger onTrigger, RollbackSession rollbackSession) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();
    try {
        onTrigger.execute(session);
        session.commit();
    } catch (final Throwable t) {
        logger.error("{} failed to process due to {}; rolling back session", new Object[] { onTrigger, t });
        rollbackSession.rollback(session, t);
        throw t;
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession)

Example 95 with ProcessSession

use of org.apache.nifi.processor.ProcessSession in project nifi by apache.

the class AbstractPort method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ProcessSession session = sessionFactory.createSession();
    try {
        onTrigger(context, session);
        session.commit();
    } catch (final ProcessException e) {
        session.rollback();
        throw e;
    } catch (final Throwable t) {
        session.rollback();
        throw new RuntimeException(t);
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) ProcessException(org.apache.nifi.processor.exception.ProcessException)

Aggregations

ProcessSession (org.apache.nifi.processor.ProcessSession)129 FlowFile (org.apache.nifi.flowfile.FlowFile)96 ProcessContext (org.apache.nifi.processor.ProcessContext)55 IOException (java.io.IOException)54 ProcessException (org.apache.nifi.processor.exception.ProcessException)51 Test (org.junit.Test)47 Relationship (org.apache.nifi.processor.Relationship)45 List (java.util.List)42 ArrayList (java.util.ArrayList)41 Map (java.util.Map)39 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)39 ComponentLog (org.apache.nifi.logging.ComponentLog)39 HashSet (java.util.HashSet)38 Set (java.util.Set)38 HashMap (java.util.HashMap)35 Collections (java.util.Collections)33 CapabilityDescription (org.apache.nifi.annotation.documentation.CapabilityDescription)33 Tags (org.apache.nifi.annotation.documentation.Tags)33 InputRequirement (org.apache.nifi.annotation.behavior.InputRequirement)31 MockFlowFile (org.apache.nifi.util.MockFlowFile)31