Search in sources :

Example 6 with ProcessException

use of org.apache.nifi.processor.exception.ProcessException in project kylo by Teradata.

the class GetFeedMetadata method onTrigger.

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String categoryName = context.getProperty(CATEGORY_NAME).evaluateAttributeExpressions(flowFile).getValue();
    String feedName = context.getProperty(FEED_NAME).evaluateAttributeExpressions(flowFile).getValue();
    getLog().debug("Triggered for {}.{}", new Object[] { categoryName, feedName });
    String feedJson;
    try {
        feedJson = cachedFeed.get(new FeedKey(categoryName, feedName));
    } catch (Exception e) {
        getLog().error("Failure retrieving metadata for feed: {}.{}", new Object[] { categoryName, feedName }, e);
        throw new IllegalStateException("Failed to retrieve feed metadata", e);
    }
    if (feedJson == null) {
        throw new IllegalStateException(String.format("Failed to retrieve feed metadata for feed %s:%s", categoryName, feedName));
    }
    // Create attributes for FlowFile
    Map<String, String> attributes = Maps.newHashMap();
    attributes.put("feedJson", feedJson);
    // Create a FlowFile from the event
    flowFile = session.putAllAttributes(flowFile, attributes);
    getLog().trace("Transferring flow file to Success relationship");
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException)

Example 7 with ProcessException

use of org.apache.nifi.processor.exception.ProcessException in project kylo by Teradata.

the class TriggerCleanup method onTrigger.

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException {
    getLog().trace("Triggered for feed {}.{}", new Object[] { category, feed });
    // Look for an event to process
    FeedCleanupTriggerEvent event = queue.poll();
    if (event == null) {
        getLog().trace("Triggered, but no message in queue");
        context.yield();
        // nothing to do
        return;
    }
    String feedId;
    try {
        feedId = getMetadataService(context).getProvider().getFeedId(category, feed);
        getLog().debug("Triggered for feed " + feedId);
    } catch (Exception e) {
        getLog().error("Failure retrieving metadata for feed: {}.{}", new Object[] { category, feed }, e);
        throw new IllegalStateException("Failed to retrieve feed metadata", e);
    }
    // Verify feed properties
    Properties properties = (feedId != null) ? getMetadataService(context).getProvider().getFeedProperties(feedId) : null;
    getLog().debug("Feed properties " + properties);
    if (properties == null) {
        throw new IllegalStateException("Failed to fetch properties for feed: " + feedId);
    }
    if (!properties.containsKey(FeedProperties.CLEANUP_ENABLED) || !"true".equals(properties.getProperty(FeedProperties.CLEANUP_ENABLED))) {
        getLog().info("Ignoring cleanup event because deleteEnabled is false for feed: {}", new Object[] { feedId });
        context.yield();
        // ignore events if deleteEnabled is not true
        return;
    }
    // Create attributes for FlowFile
    Map<String, String> attributes = Maps.newHashMap();
    for (Map.Entry<Object, Object> property : properties.entrySet()) {
        attributes.put((String) property.getKey(), (String) property.getValue());
    }
    attributes.put("category", context.getProperty(CATEGORY_NAME).getValue());
    attributes.put("feed", context.getProperty(FEED_NAME).getValue());
    // Create a FlowFile from the event
    FlowFile flowFile = session.create();
    flowFile = session.putAllAttributes(flowFile, attributes);
    getLog().debug("Transferring flow file to Success relationship");
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) FeedCleanupTriggerEvent(com.thinkbiganalytics.metadata.rest.model.event.FeedCleanupTriggerEvent) Properties(java.util.Properties) FeedProperties(com.thinkbiganalytics.metadata.api.feed.FeedProperties) Map(java.util.Map) ProcessException(org.apache.nifi.processor.exception.ProcessException)

Example 8 with ProcessException

use of org.apache.nifi.processor.exception.ProcessException in project kylo by Teradata.

the class ExecutePySpark method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    try {
        final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
        final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
        final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue();
        final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES).evaluateAttributeExpressions(flowFile).getValue();
        final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue().trim().toLowerCase();
        final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE).evaluateAttributeExpressions(flowFile).getValue();
        final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue();
        final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue();
        final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        final String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        final String executorInstances = context.getProperty(EXECUTOR_INSTANCES).evaluateAttributeExpressions(flowFile).getValue();
        final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile).getValue();
        final String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile).getValue();
        final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS).evaluateAttributeExpressions(flowFile).getValue();
        PySparkUtils pySparkUtils = new PySparkUtils();
        /* Get app arguments */
        String[] pySparkAppArgsArray = null;
        if (!StringUtils.isEmpty(pySparkAppArgs)) {
            pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs);
            logger.info("Provided application arguments: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }
        /* Get additional python files */
        String[] pySparkAdditionalFilesArray = null;
        if (!StringUtils.isEmpty(pySparkAdditionalFiles)) {
            pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles);
            logger.info("Provided python files: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) });
        }
        /* Get additional config key-value pairs */
        String[] additionalSparkConfigOptionsArray = null;
        if (!StringUtils.isEmpty(additionalSparkConfigOptions)) {
            additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions);
            logger.info("Provided spark config options: {}", new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) });
        }
        /* Determine if Kerberos is enabled */
        boolean kerberosEnabled = false;
        if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) {
            kerberosEnabled = true;
            logger.info("Kerberos is enabled");
        }
        /* For Kerberized cluster, attempt user authentication */
        if (kerberosEnabled) {
            logger.info("Attempting user authentication for Kerberos");
            ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy();
            Configuration configuration;
            try {
                logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources);
                configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources);
                if (SecurityUtil.isSecurityEnabled(configuration)) {
                    logger.info("Security is enabled");
                    if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) {
                        logger.error("Kerberos Principal and Keytab provided with empty values for a Kerberized cluster.");
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                    try {
                        logger.info("User authentication initiated");
                        boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab);
                        if (authenticationStatus) {
                            logger.info("User authenticated successfully.");
                        } else {
                            logger.error("User authentication failed.");
                            session.transfer(flowFile, REL_FAILURE);
                            return;
                        }
                    } catch (Exception unknownException) {
                        logger.error("Unknown exception occurred while validating user :" + unknownException.getMessage());
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                }
            } catch (IOException e1) {
                logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage());
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
        /* Build and launch PySpark Job */
        logger.info("Configuring PySpark job for execution");
        SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile);
        logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile });
        if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) {
            pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray);
            logger.info("App arguments set to: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }
        pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster);
        logger.info("App name set to: {}", new Object[] { pySparkAppName });
        logger.info("Spark master set to: {}", new Object[] { sparkMaster });
        if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) {
            for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) {
                pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile);
                logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile });
            }
        }
        if (sparkMaster.equals("yarn")) {
            pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode);
            logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode });
        }
        pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory).setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances).setConf(SparkLauncher.EXECUTOR_CORES, executorCores).setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout);
        logger.info("Spark home set to: {} ", new Object[] { sparkHome });
        logger.info("Driver memory set to: {} ", new Object[] { driverMemory });
        logger.info("Executor memory set to: {} ", new Object[] { executorMemory });
        logger.info("Executor instances set to: {} ", new Object[] { executorInstances });
        logger.info("Executor cores set to: {} ", new Object[] { executorCores });
        logger.info("Network timeout set to: {} ", new Object[] { networkTimeout });
        if (kerberosEnabled) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal);
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab);
            logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal });
            logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab });
        }
        if (!StringUtils.isEmpty(yarnQueue)) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue);
            logger.info("YARN queue set to: {} ", new Object[] { yarnQueue });
        }
        if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) {
            for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) {
                String[] confKeyValue = additionalSparkConfigOption.split("=");
                if (confKeyValue.length == 2) {
                    pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]);
                    logger.info("Spark additional config option set to: {}={}", new Object[] { confKeyValue[0], confKeyValue[1] });
                }
            }
        }
        logger.info("Starting execution of PySpark job");
        Process pySparkProcess = pySparkLauncher.launch();
        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();
        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();
        logger.info("Waiting for PySpark job to complete");
        int exitCode = pySparkProcess.waitFor();
        if (exitCode != 0) {
            logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]", new Object[] { exitCode });
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]", new Object[] { exitCode });
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : InputStreamReaderRunnable(com.thinkbiganalytics.nifi.util.InputStreamReaderRunnable) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) ApplySecurityPolicy(com.thinkbiganalytics.nifi.security.ApplySecurityPolicy) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) PySparkUtils(com.thinkbiganalytics.nifi.pyspark.utils.PySparkUtils) SparkLauncher(org.apache.spark.launcher.SparkLauncher)

Example 9 with ProcessException

use of org.apache.nifi.processor.exception.ProcessException in project kylo by Teradata.

the class MetadataClientRecorder method startFeedInitialization.

/* (non-Javadoc)
     * @see com.thinkbiganalytics.nifi.core.api.metadata.MetadataRecorder#startFeedInitialization(java.lang.String)
     */
@Override
public InitializationStatus startFeedInitialization(String feedId) {
    InitializationStatus status = new InitializationStatus(InitializationStatus.State.IN_PROGRESS);
    try {
        this.client.updateCurrentInitStatus(feedId, status);
        getInitStatusCache().put(feedId, Optional.of(status));
        return status;
    } catch (Exception e) {
        log.error("Failed to update metadata with feed initialization in-progress status: {},  feed: {}", status.getState(), feedId, e);
        getInitStatusCache().invalidate(feedId);
        throw new ProcessException("Failed to update metadata with feed initialization in-progress status: " + status + ",  feed: " + feedId, e);
    }
}
Also used : ProcessException(org.apache.nifi.processor.exception.ProcessException) InitializationStatus(com.thinkbiganalytics.metadata.rest.model.feed.InitializationStatus) ProcessException(org.apache.nifi.processor.exception.ProcessException) WaterMarkActiveException(com.thinkbiganalytics.nifi.core.api.metadata.WaterMarkActiveException) ActiveWaterMarksCancelledException(com.thinkbiganalytics.nifi.core.api.metadata.ActiveWaterMarksCancelledException) ExecutionException(java.util.concurrent.ExecutionException)

Example 10 with ProcessException

use of org.apache.nifi.processor.exception.ProcessException in project kylo by Teradata.

the class MetadataClientRecorder method completeFeedInitialization.

/* (non-Javadoc)
     * @see com.thinkbiganalytics.nifi.core.api.metadata.MetadataRecorder#completeFeedInitialization(java.lang.String)
     */
@Override
public InitializationStatus completeFeedInitialization(String feedId) {
    InitializationStatus status = new InitializationStatus(InitializationStatus.State.SUCCESS);
    try {
        this.client.updateCurrentInitStatus(feedId, status);
        getInitStatusCache().put(feedId, Optional.of(status));
        return status;
    } catch (Exception e) {
        log.error("Failed to update metadata with feed initialization completion status: {},  feed: {}", status.getState(), feedId, e);
        getInitStatusCache().invalidate(feedId);
        throw new ProcessException("Failed to update metadata with feed initialization completion status: " + status + ",  feed: " + feedId, e);
    }
}
Also used : ProcessException(org.apache.nifi.processor.exception.ProcessException) InitializationStatus(com.thinkbiganalytics.metadata.rest.model.feed.InitializationStatus) ProcessException(org.apache.nifi.processor.exception.ProcessException) WaterMarkActiveException(com.thinkbiganalytics.nifi.core.api.metadata.WaterMarkActiveException) ActiveWaterMarksCancelledException(com.thinkbiganalytics.nifi.core.api.metadata.ActiveWaterMarksCancelledException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ProcessException (org.apache.nifi.processor.exception.ProcessException)274 FlowFile (org.apache.nifi.flowfile.FlowFile)169 IOException (java.io.IOException)162 InputStream (java.io.InputStream)79 HashMap (java.util.HashMap)78 ComponentLog (org.apache.nifi.logging.ComponentLog)78 OutputStream (java.io.OutputStream)62 ArrayList (java.util.ArrayList)55 Map (java.util.Map)52 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)39 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)38 StopWatch (org.apache.nifi.util.StopWatch)37 HashSet (java.util.HashSet)36 ProcessSession (org.apache.nifi.processor.ProcessSession)35 Relationship (org.apache.nifi.processor.Relationship)33 List (java.util.List)31 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)29 AtomicReference (java.util.concurrent.atomic.AtomicReference)28 Set (java.util.Set)26 ProcessContext (org.apache.nifi.processor.ProcessContext)25