Search in sources :

Example 11 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class GetFeedsHistoryReindex method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        flowFile = session.create();
    }
    logger.debug("Checking for feeds requiring reindexing historical data");
    try {
        MetadataProviderService metadataProviderService = getMetadataService(context);
        if ((metadataProviderService != null) && (metadataProviderService.getProvider() != null)) {
            String dateTimeOfCheck = String.valueOf(DateTime.now(DateTimeZone.UTC));
            FeedsForDataHistoryReindex feedsForHistoryReindexing = getMetadataService(context).getProvider().getFeedsForHistoryReindexing();
            if (feedsForHistoryReindexing != null) {
                logger.info("Found {} feeds requiring reindexing historical data", new Object[] { feedsForHistoryReindexing.getFeeds().size() });
                if (feedsForHistoryReindexing.getFeedCount() > 0) {
                    for (Feed feedForHistoryReindexing : feedsForHistoryReindexing.getFeeds()) {
                        Map<String, String> attributes = new HashMap<>();
                        attributes.put(FEED_ID_FOR_HISTORY_REINDEX_KEY, feedForHistoryReindexing.getId());
                        attributes.put(FEED_SYSTEM_NAME_FOR_HISTORY_REINDEX_KEY, feedForHistoryReindexing.getSystemName());
                        attributes.put(FEED_CATEGORY_SYSTEM_NAME_FOR_HISTORY_REINDEX_KEY, feedForHistoryReindexing.getCategory().getSystemName());
                        attributes.put(FEED_STATUS_FOR_HISTORY_REINDEX_KEY, feedForHistoryReindexing.getCurrentHistoryReindexingStatus().getHistoryReindexingState().toString());
                        attributes.put(FEED_LAST_MODIFIED_UTC_FOR_HISTORY_REINDEX_KEY, feedForHistoryReindexing.getCurrentHistoryReindexingStatus().getLastModifiedTimestamp().toString());
                        attributes.put(FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY, String.valueOf(feedsForHistoryReindexing.getFeedCount()));
                        attributes.put(FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY, feedsForHistoryReindexing.getFeedIds().toString());
                        attributes.put(FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY, dateTimeOfCheck);
                        // all attributes from parent flow file copied except uuid, creates a FORK event
                        FlowFile feedFlowFile = session.create(flowFile);
                        feedFlowFile = session.putAllAttributes(feedFlowFile, attributes);
                        session.transfer(feedFlowFile, REL_FOUND);
                        logger.info("Flow file created for reindexing feed's historical data: feed id {}, category name {}, feed name {}", new Object[] { FEED_ID_FOR_HISTORY_REINDEX_KEY, FEED_CATEGORY_SYSTEM_NAME_FOR_HISTORY_REINDEX_KEY, FEED_SYSTEM_NAME_FOR_HISTORY_REINDEX_KEY });
                    }
                    flowFile = session.putAttribute(flowFile, FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY, String.valueOf(feedsForHistoryReindexing.getFeedCount()));
                    flowFile = session.putAttribute(flowFile, FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY, dateTimeOfCheck);
                    // only for found case
                    session.transfer(flowFile, REL_ORIGINAL);
                } else {
                    // this will always be 0 here
                    flowFile = session.putAttribute(flowFile, FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY, String.valueOf(feedsForHistoryReindexing.getFeedCount()));
                    // this will always be empty list here
                    flowFile = session.putAttribute(flowFile, FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY, feedsForHistoryReindexing.getFeedIds().toString());
                    flowFile = session.putAttribute(flowFile, FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY, dateTimeOfCheck);
                    session.transfer(flowFile, REL_NOT_FOUND);
                }
            }
        } else {
            logger.error("Error checking for feeds requiring reindexing historical data. Check if Kylo services is running, and accessible from NiFi.");
            session.transfer(flowFile, REL_FAILURE);
        }
    } catch (Exception e) {
        logger.error("An exception was thrown during check for feeds requiring reindexing historical data: {}", new Object[] { e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) FeedsForDataHistoryReindex(com.thinkbiganalytics.metadata.rest.model.feed.reindex.FeedsForDataHistoryReindex) ComponentLog(org.apache.nifi.logging.ComponentLog) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) Feed(com.thinkbiganalytics.metadata.rest.model.feed.Feed)

Example 12 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class ExecutePySpark method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    try {
        final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
        final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
        final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue();
        final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES).evaluateAttributeExpressions(flowFile).getValue();
        final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue().trim().toLowerCase();
        final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE).evaluateAttributeExpressions(flowFile).getValue();
        final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue();
        final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue();
        final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        final String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        final String executorInstances = context.getProperty(EXECUTOR_INSTANCES).evaluateAttributeExpressions(flowFile).getValue();
        final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile).getValue();
        final String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile).getValue();
        final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS).evaluateAttributeExpressions(flowFile).getValue();
        PySparkUtils pySparkUtils = new PySparkUtils();
        /* Get app arguments */
        String[] pySparkAppArgsArray = null;
        if (!StringUtils.isEmpty(pySparkAppArgs)) {
            pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs);
            logger.info("Provided application arguments: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }
        /* Get additional python files */
        String[] pySparkAdditionalFilesArray = null;
        if (!StringUtils.isEmpty(pySparkAdditionalFiles)) {
            pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles);
            logger.info("Provided python files: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) });
        }
        /* Get additional config key-value pairs */
        String[] additionalSparkConfigOptionsArray = null;
        if (!StringUtils.isEmpty(additionalSparkConfigOptions)) {
            additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions);
            logger.info("Provided spark config options: {}", new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) });
        }
        /* Determine if Kerberos is enabled */
        boolean kerberosEnabled = false;
        if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) {
            kerberosEnabled = true;
            logger.info("Kerberos is enabled");
        }
        /* For Kerberized cluster, attempt user authentication */
        if (kerberosEnabled) {
            logger.info("Attempting user authentication for Kerberos");
            ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy();
            Configuration configuration;
            try {
                logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources);
                configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources);
                if (SecurityUtil.isSecurityEnabled(configuration)) {
                    logger.info("Security is enabled");
                    if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) {
                        logger.error("Kerberos Principal and Keytab provided with empty values for a Kerberized cluster.");
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                    try {
                        logger.info("User authentication initiated");
                        boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab);
                        if (authenticationStatus) {
                            logger.info("User authenticated successfully.");
                        } else {
                            logger.error("User authentication failed.");
                            session.transfer(flowFile, REL_FAILURE);
                            return;
                        }
                    } catch (Exception unknownException) {
                        logger.error("Unknown exception occurred while validating user :" + unknownException.getMessage());
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                }
            } catch (IOException e1) {
                logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage());
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
        /* Build and launch PySpark Job */
        logger.info("Configuring PySpark job for execution");
        SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile);
        logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile });
        if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) {
            pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray);
            logger.info("App arguments set to: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }
        pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster);
        logger.info("App name set to: {}", new Object[] { pySparkAppName });
        logger.info("Spark master set to: {}", new Object[] { sparkMaster });
        if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) {
            for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) {
                pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile);
                logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile });
            }
        }
        if (sparkMaster.equals("yarn")) {
            pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode);
            logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode });
        }
        pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory).setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances).setConf(SparkLauncher.EXECUTOR_CORES, executorCores).setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout);
        logger.info("Spark home set to: {} ", new Object[] { sparkHome });
        logger.info("Driver memory set to: {} ", new Object[] { driverMemory });
        logger.info("Executor memory set to: {} ", new Object[] { executorMemory });
        logger.info("Executor instances set to: {} ", new Object[] { executorInstances });
        logger.info("Executor cores set to: {} ", new Object[] { executorCores });
        logger.info("Network timeout set to: {} ", new Object[] { networkTimeout });
        if (kerberosEnabled) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal);
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab);
            logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal });
            logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab });
        }
        if (!StringUtils.isEmpty(yarnQueue)) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue);
            logger.info("YARN queue set to: {} ", new Object[] { yarnQueue });
        }
        if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) {
            for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) {
                String[] confKeyValue = additionalSparkConfigOption.split("=");
                if (confKeyValue.length == 2) {
                    pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]);
                    logger.info("Spark additional config option set to: {}={}", new Object[] { confKeyValue[0], confKeyValue[1] });
                }
            }
        }
        logger.info("Starting execution of PySpark job");
        Process pySparkProcess = pySparkLauncher.launch();
        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();
        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();
        logger.info("Waiting for PySpark job to complete");
        int exitCode = pySparkProcess.waitFor();
        if (exitCode != 0) {
            logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]", new Object[] { exitCode });
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]", new Object[] { exitCode });
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : InputStreamReaderRunnable(com.thinkbiganalytics.nifi.util.InputStreamReaderRunnable) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) ApplySecurityPolicy(com.thinkbiganalytics.nifi.security.ApplySecurityPolicy) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) PySparkUtils(com.thinkbiganalytics.nifi.pyspark.utils.PySparkUtils) SparkLauncher(org.apache.spark.launcher.SparkLauncher)

Example 13 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class MetadataClientRecorder method releaseWaterMark.

/* (non-Javadoc)
     * @see com.thinkbiganalytics.nifi.core.api.metadata.MetadataRecorder#releaseWaterMark(org.apache.nifi.processor.ProcessSession, org.apache.nifi.flowfile.FlowFile, java.lang.String, java.lang.String)
     */
@Override
public FlowFile releaseWaterMark(ProcessSession session, FlowFile ff, String feedId, String waterMarkName) {
    FlowFile resultFF = ff;
    Map<String, WaterMarkParam> ffWaterMarks = getCurrentWaterMarksAttr(ff);
    WaterMarkParam param = ffWaterMarks.get(waterMarkName);
    try {
        if (param != null) {
            // Update the flowfile with the modified set of active water marks.
            removeFromCurrentWaterMarksAttr(session, resultFF, waterMarkName, param.name);
            resetWaterMarkParam(session, resultFF, feedId, waterMarkName, param.name);
        } else {
            log.warn("Received request to release a water mark not found in the flow file: {}", waterMarkName);
        }
    } finally {
        // Even if water mark resetting fails we should always release the water mark.
        Long activeTimestamp = getActiveWaterMarkTimestamp(feedId, waterMarkName);
        if (activeTimestamp != null) {
            if (param == null || param.timestamp == activeTimestamp) {
                releaseActiveWaterMark(feedId, waterMarkName, activeTimestamp);
            } else if (param.timestamp != activeTimestamp) {
                // If the water mark timestamp does not match the one recorded as an active water mark this means
                // this flowfile's water mark has been canceled and another flow file should be considered the active one.
                // In this case this water mark value has been superseded and no release should occur.
                log.info("Received request to release a water mark version that is no longer active: {}", waterMarkName);
            }
        } else {
            // The water mark is not one recognize as an active one.
            log.warn("Received request to release a non-active water mark: {}", waterMarkName);
        }
    }
    return resultFF;
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile)

Example 14 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class MetadataClientRecorder method commitAllWaterMarks.

/* (non-Javadoc)
     * @see com.thinkbiganalytics.nifi.core.api.metadata.MetadataRecorder#commitAllWaterMarks(org.apache.nifi.processor.ProcessSession, org.apache.nifi.flowfile.FlowFile, java.lang.String)
     */
@Override
public FlowFile commitAllWaterMarks(ProcessSession session, FlowFile ff, String feedId) {
    FlowFile resultFF = ff;
    Set<String> cancelledWaterMarks = new HashSet<>();
    // TODO do more efficiently
    for (String waterMarkName : new HashSet<String>(getCurrentWaterMarksAttr(ff).keySet())) {
        try {
            resultFF = commitWaterMark(session, resultFF, feedId, waterMarkName);
        } catch (ActiveWaterMarksCancelledException e) {
            cancelledWaterMarks.addAll(e.getWaterMarkNames());
        }
    }
    if (cancelledWaterMarks.size() > 0) {
        throw new ActiveWaterMarksCancelledException(feedId, cancelledWaterMarks);
    } else {
        return resultFF;
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ActiveWaterMarksCancelledException(com.thinkbiganalytics.nifi.core.api.metadata.ActiveWaterMarksCancelledException) HashSet(java.util.HashSet)

Example 15 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project kylo by Teradata.

the class SetSavepoint method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final SavepointController controller = context.getProperty(SAVEPOINT_SERVICE).asControllerService(SavepointController.class);
    final SavepointProvider provider = controller.getProvider();
    final PropertyValue pvSavepointId = context.getProperty(SAVEPOINT_ID);
    final String processorId = getIdentifier();
    FlowFile flowFile = null;
    long start = System.currentTimeMillis();
    Optional<FlowFile> nextFlowfile = getNextFlowFile(context, session, controller, provider, pvSavepointId);
    long stop = System.currentTimeMillis();
    if (!nextFlowfile.isPresent()) {
        return;
    } else {
        flowFile = nextFlowfile.get();
    }
    getLogger().info("Time to iterate over {} flow files: {} ms, {} ", new Object[] { session.getQueueSize(), (stop - start), nextFlowfile.isPresent() ? nextFlowfile.get() : " Nothing found " });
    final ComponentLog logger = getLogger();
    // We do processing on each flowfile here
    final String savepointIdStr = pvSavepointId.evaluateAttributeExpressions(flowFile).getValue();
    final String flowfileId = flowFile.getAttribute(CoreAttributes.UUID.key());
    Lock lock = null;
    try {
        lock = provider.lock(savepointIdStr);
        if (lock != null) {
            SavepointEntry entry = provider.lookupEntry(savepointIdStr);
            if (isExpired(context, session, provider, flowFile, savepointIdStr, lock)) {
                return;
            }
            String waitStartTimestamp;
            // add the processor id for the current savepoint
            // this will be used to check on the next save point if the flow file should be examined and processed.
            flowFile = session.putAttribute(flowFile, SAVEPOINT_PROCESSOR_ID, getIdentifier());
            if (entry == null || entry.getState(processorId) == null) {
                // Register new
                provider.register(savepointIdStr, processorId, flowfileId, lock);
                flowFile = tryFlowFile(session, flowFile, "-1");
                // add in timestamps
                // Set wait start timestamp if it's not set yet
                waitStartTimestamp = flowFile.getAttribute(SAVEPOINT_START_TIMESTAMP);
                if (waitStartTimestamp == null) {
                    waitStartTimestamp = String.valueOf(System.currentTimeMillis());
                    flowFile = session.putAttribute(flowFile, SAVEPOINT_START_TIMESTAMP, waitStartTimestamp);
                }
                session.transfer(flowFile);
            } else {
                SavepointEntry.SavePointState state = entry.getState(processorId);
                switch(state) {
                    case RELEASE_SUCCESS:
                        provider.commitRelease(savepointIdStr, processorId, lock);
                        // add provenance to indicate success
                        flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.RELEASE_STATUS_KEY, SavepointProvenanceProperties.RELEASE_STATUS.SUCCESS.name());
                        session.transfer(flowFile, REL_RELEASE_SUCCESS);
                        break;
                    case RELEASE_FAILURE:
                        provider.commitRelease(savepointIdStr, processorId, lock);
                        // add provenance to indicate failure
                        flowFile = session.putAttribute(flowFile, SavepointProvenanceProperties.RELEASE_STATUS_KEY, SavepointProvenanceProperties.RELEASE_STATUS.FAILURE.name());
                        session.transfer(flowFile, REL_RELEASE_FAILURE);
                        break;
                    case RETRY:
                        String retryCount = flowFile.getAttribute(SAVEPOINT_RETRY_COUNT);
                        if (retryCount == null) {
                            retryCount = "0";
                        }
                        provider.commitRetry(savepointIdStr, processorId, lock);
                        flowFile = tryFlowFile(session, flowFile, retryCount);
                        session.transfer(flowFile);
                        break;
                    case WAIT:
                        session.transfer(flowFile, REL_SELF);
                        break;
                    default:
                        logger.warn("Unexpected savepoint state.");
                        session.transfer(flowFile, REL_FAILURE);
                }
            }
        } else {
            // Lock busy so try again later
            // add it back to cache
            controller.putFlowfileBack(processorId, flowfileId);
            logger.info("Unable to obtain lock.  It is already locked by another process.  Adding back to queue {} ", new Object[] { flowfileId });
            session.transfer(flowFile, REL_SELF);
        }
    } catch (IOException | InvalidLockException | InvalidSetpointException e) {
        logger.warn("Failed to process flowfile {} for savepoint {}", new String[] { flowfileId, savepointIdStr }, e);
        flowFile = session.putAttribute(flowFile, SAVEPOINT_EXCEPTION, "Failed to process flowfile " + flowfileId + " for savepoint " + savepointIdStr + ". " + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
    } finally {
        if (lock != null) {
            try {
                provider.unlock(lock);
            } catch (IOException e) {
                logger.warn("Unable to unlock {}", new String[] { savepointIdStr });
            }
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InvalidSetpointException(com.thinkbiganalytics.nifi.v2.core.savepoint.InvalidSetpointException) SavepointController(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointController) PropertyValue(org.apache.nifi.components.PropertyValue) IOException(java.io.IOException) SavepointEntry(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointEntry) ComponentLog(org.apache.nifi.logging.ComponentLog) Lock(com.thinkbiganalytics.nifi.v2.core.savepoint.Lock) SavepointProvider(com.thinkbiganalytics.nifi.v2.core.savepoint.SavepointProvider) InvalidLockException(com.thinkbiganalytics.nifi.v2.core.savepoint.InvalidLockException)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55