Search in sources :

Example 11 with MetadataProviderService

use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.

the class ExecuteSparkJobTest method testValidatorsForDatasources.

/**
 * Verify validators for Data Sources property.
 */
@Test
public void testValidatorsForDatasources() throws Exception {
    // Test UUID list validator
    runner.setProperty(ExecuteSparkJob.DATASOURCES, "INVALID");
    runner.enqueue(new byte[0]);
    Set<String> results = ((MockProcessContext) runner.getProcessContext()).validate().stream().map(Object::toString).collect(Collectors.toSet());
    Assert.assertEquals(1, results.size());
    Assert.assertTrue(results.contains("'Data Sources' validated against 'INVALID' is invalid because not a list of UUIDs"));
    // Test missing metadata service
    runner.setProperty(ExecuteSparkJob.DATASOURCES, "87870c7e-8ae8-4db4-9959-c2f5a9496833");
    runner.enqueue(new byte[0]);
    results = ((MockProcessContext) runner.getProcessContext()).validate().stream().map(Object::toString).collect(Collectors.toSet());
    Assert.assertEquals(1, results.size());
    Assert.assertTrue(results.contains("'Metadata Service' is invalid because Metadata Service is required when Data Sources is not empty"));
    // Test with one UUID
    final MetadataProviderService metadataService = new MockMetadataProviderService();
    runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
    runner.enableControllerService(metadataService);
    runner.setProperty(ExecuteSparkJob.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
    runner.enqueue(new byte[0]);
    Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
    // Test with two UUIDs
    runner.setProperty(ExecuteSparkJob.DATASOURCES, "87870c7e-8ae8-4db4-9959-c2f5a9496833,e4562514-8e06-459a-8ea9-1e2630c852f9");
    runner.enqueue(new byte[0]);
    Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
    // Test with expression
    runner.setProperty(ExecuteSparkJob.DATASOURCES, "${metadata.dataTransformation.datasourceIds}");
    runner.enqueue(new byte[0], Collections.singletonMap("metadata.dataTransformation.datasourceIds", "87870c7e-8ae8-4db4-9959-c2f5a9496833"));
    Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
}
Also used : MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) MockProcessContext(org.apache.nifi.util.MockProcessContext) Test(org.junit.Test)

Example 12 with MetadataProviderService

use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.

the class ExecuteSparkJob method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String PROVENANCE_JOB_STATUS_KEY = "Job Status";
    String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code";
    try {
        PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status";
        PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code";
        /* Configuration parameters for spark launcher */
        String appJar = context.getProperty(APPLICATION_JAR).evaluateAttributeExpressions(flowFile).getValue().trim();
        String extraJars = context.getProperty(EXTRA_JARS).evaluateAttributeExpressions(flowFile).getValue();
        String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue();
        String mainClass = context.getProperty(MAIN_CLASS).evaluateAttributeExpressions(flowFile).getValue().trim();
        String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue().trim();
        String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE).evaluateAttributeExpressions(flowFile).getValue();
        String appArgs = context.getProperty(MAIN_ARGS).evaluateAttributeExpressions(flowFile).getValue().trim();
        String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
        String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile).getValue();
        String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME).evaluateAttributeExpressions(flowFile).getValue();
        String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile).getValue();
        String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile).getValue();
        String principal = context.getProperty(kerberosPrincipal).getValue();
        String keyTab = context.getProperty(kerberosKeyTab).getValue();
        String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue();
        String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue();
        String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile).getValue();
        Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT).evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue();
        String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile).getValue();
        MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE).asControllerService(MetadataProviderService.class);
        final List<String> extraJarPaths = getExtraJarPaths(extraJars);
        // If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated
        boolean isAuthenticated = !StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab) && !StringUtils.isEmpty(hadoopConfigurationResources);
        try {
            if (isAuthenticated && isSecurityEnabled(hadoopConfigurationResources)) {
                logger.info("Security is enabled");
                if (principal.equals("") && keyTab.equals("")) {
                    logger.error("Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ", new Object[] { flowFile });
                    session.transfer(flowFile, REL_FAILURE);
                    return;
                }
                logger.info("User authentication initiated");
                boolean authenticationStatus = new ApplySecurityPolicy().validateUserWithKerberos(logger, hadoopConfigurationResources, principal, keyTab);
                if (authenticationStatus) {
                    logger.info("User authenticated successfully.");
                } else {
                    logger.error("User authentication failed.  {} ", new Object[] { flowFile });
                    session.transfer(flowFile, REL_FAILURE);
                    return;
                }
            }
        } catch (IOException e1) {
            logger.error("Unknown exception occurred while authenticating user : {} and flow file: {}", new Object[] { e1.getMessage(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        } catch (Exception unknownException) {
            logger.error("Unknown exception occurred while validating user : {}.  {} ", new Object[] { unknownException.getMessage(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue();
        // Build environment
        final Map<String, String> env = getDatasources(session, flowFile, PROVENANCE_JOB_STATUS_KEY, datasourceIds, metadataService, extraJarPaths);
        if (env == null) {
            return;
        }
        /* Launch the spark job as a child process */
        SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass).setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory).setConf(SPARK_NUM_EXECUTORS, numberOfExecutors).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory).setConf(SparkLauncher.EXECUTOR_CORES, executorCores).setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome).setAppName(sparkApplicationName);
        OptionalSparkConfigurator optionalSparkConf = new OptionalSparkConfigurator(launcher).setDeployMode(sparkMaster, sparkYarnDeployMode).setAuthentication(isAuthenticated, keyTab, principal).addAppArgs(appArgs).addSparkArg(sparkConfs).addExtraJars(extraJarPaths).setYarnQueue(yarnQueue).setExtraFiles(extraFiles);
        Process spark = optionalSparkConf.getLaucnher().launch();
        /* Read/clear the process input stream */
        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();
        /* Read/clear the process error stream */
        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();
        logger.info("Waiting for Spark job to complete");
        /* Wait for job completion */
        boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS);
        if (!completed) {
            spark.destroyForcibly();
            getLog().error("Spark process timed out after {} seconds using flow file: {}  ", new Object[] { sparkProcessTimeout, flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        int exitCode = spark.exitValue();
        flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, Integer.toString(exitCode));
        if (exitCode != 0) {
            logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ", new Object[] { context.getName(), flowFile, exitCode });
            flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed");
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ", new Object[] { context.getName(), flowFile, exitCode });
            flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success");
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e);
        flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception");
        flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : InputStreamReaderRunnable(com.thinkbiganalytics.nifi.util.InputStreamReaderRunnable) FlowFile(org.apache.nifi.flowfile.FlowFile) ApplySecurityPolicy(com.thinkbiganalytics.nifi.security.ApplySecurityPolicy) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) SparkLauncher(org.apache.spark.launcher.SparkLauncher)

Example 13 with MetadataProviderService

use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.

the class PutFeedMetadataTest method setUp.

@Before
public void setUp() throws Exception {
    // Setup services
    final MetadataProviderService metadataService = new MockMetadataProviderService();
    // Setup test runner
    runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
    runner.enableControllerService(metadataService);
    runner.setProperty(TriggerCleanup.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
}
Also used : MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) Before(org.junit.Before)

Example 14 with MetadataProviderService

use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.

the class GetFeedsHistoryReindexTest method testNoFeedsToReindex.

@Test
public void testNoFeedsToReindex() throws Exception {
    final String METADATA_SERVICE_IDENTIFIER = "MockMetadataProviderService_NoFeedsToIndex";
    final TestRunner runner = TestRunners.newTestRunner(GetFeedsHistoryReindex.class);
    final MetadataProviderService metadataService = new MockMetadataProviderService_NoFeedsToReindex();
    runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
    runner.enableControllerService(metadataService);
    runner.setProperty(GetFeedsHistoryReindex.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
    runner.run(1);
    runner.assertQueueEmpty();
    runner.assertTransferCount(GetFeedsHistoryReindex.REL_FOUND, 0);
    runner.assertTransferCount(GetFeedsHistoryReindex.REL_NOT_FOUND, 1);
    runner.assertTransferCount(GetFeedsHistoryReindex.REL_FAILURE, 0);
    runner.assertTransferCount(GetFeedsHistoryReindex.REL_ORIGINAL, 0);
    List<MockFlowFile> results = runner.getFlowFilesForRelationship(GetFeedsHistoryReindex.REL_NOT_FOUND);
    MockFlowFile resultFlowFile = results.get(0);
    resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY);
    resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY);
    resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY);
    resultFlowFile.assertAttributeEquals(GetFeedsHistoryReindex.FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY, String.valueOf(0));
    resultFlowFile.assertAttributeEquals(GetFeedsHistoryReindex.FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY, "[]");
    resultFlowFile.assertAttributeNotEquals(GetFeedsHistoryReindex.FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY, null);
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) Test(org.junit.Test)

Example 15 with MetadataProviderService

use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.

the class UpdateFeedHistoryReindexTest method setFeedStatus_ValidStateInProgress.

@Test
public void setFeedStatus_ValidStateInProgress() throws Exception {
    final String METADATA_SERVICE_IDENTIFIER = "MockMetadataProviderService_Minimal";
    final TestRunner runner = TestRunners.newTestRunner(UpdateFeedHistoryReindex.class);
    final MetadataProviderService metadataService = new MockMetadataProviderService_Minimal();
    runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
    runner.enableControllerService(metadataService);
    runner.setProperty(UpdateFeedHistoryReindex.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
    runner.setProperty(UpdateFeedHistoryReindex.FEED_ID, "feed-0-id");
    runner.setProperty(UpdateFeedHistoryReindex.FEED_REINDEX_STATUS, "IN_PROGRESS");
    runner.assertValid();
}
Also used : TestRunner(org.apache.nifi.util.TestRunner) Matchers.anyString(org.mockito.Matchers.anyString) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) Test(org.junit.Test)

Aggregations

MetadataProviderService (com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService)21 Test (org.junit.Test)14 TestRunner (org.apache.nifi.util.TestRunner)13 Matchers.anyString (org.mockito.Matchers.anyString)9 MockFlowFile (org.apache.nifi.util.MockFlowFile)5 FlowFile (org.apache.nifi.flowfile.FlowFile)4 ComponentLog (org.apache.nifi.logging.ComponentLog)4 ProcessException (org.apache.nifi.processor.exception.ProcessException)3 Before (org.junit.Before)3 IOException (java.io.IOException)2 DBCPService (org.apache.nifi.dbcp.DBCPService)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 GetTableDataSupport (com.thinkbiganalytics.ingest.GetTableDataSupport)1 Feed (com.thinkbiganalytics.metadata.rest.model.feed.Feed)1 FeedDataHistoryReindexParams (com.thinkbiganalytics.metadata.rest.model.feed.reindex.FeedDataHistoryReindexParams)1 FeedsForDataHistoryReindex (com.thinkbiganalytics.metadata.rest.model.feed.reindex.FeedsForDataHistoryReindex)1 HistoryReindexingStatus (com.thinkbiganalytics.metadata.rest.model.feed.reindex.HistoryReindexingStatus)1 CleanupEventService (com.thinkbiganalytics.nifi.core.api.cleanup.CleanupEventService)1 ApplySecurityPolicy (com.thinkbiganalytics.nifi.security.ApplySecurityPolicy)1 InputStreamReaderRunnable (com.thinkbiganalytics.nifi.util.InputStreamReaderRunnable)1