use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.
the class ExecuteSparkJobTest method testValidatorsForDatasources.
/**
* Verify validators for Data Sources property.
*/
@Test
public void testValidatorsForDatasources() throws Exception {
// Test UUID list validator
runner.setProperty(ExecuteSparkJob.DATASOURCES, "INVALID");
runner.enqueue(new byte[0]);
Set<String> results = ((MockProcessContext) runner.getProcessContext()).validate().stream().map(Object::toString).collect(Collectors.toSet());
Assert.assertEquals(1, results.size());
Assert.assertTrue(results.contains("'Data Sources' validated against 'INVALID' is invalid because not a list of UUIDs"));
// Test missing metadata service
runner.setProperty(ExecuteSparkJob.DATASOURCES, "87870c7e-8ae8-4db4-9959-c2f5a9496833");
runner.enqueue(new byte[0]);
results = ((MockProcessContext) runner.getProcessContext()).validate().stream().map(Object::toString).collect(Collectors.toSet());
Assert.assertEquals(1, results.size());
Assert.assertTrue(results.contains("'Metadata Service' is invalid because Metadata Service is required when Data Sources is not empty"));
// Test with one UUID
final MetadataProviderService metadataService = new MockMetadataProviderService();
runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
runner.enableControllerService(metadataService);
runner.setProperty(ExecuteSparkJob.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
runner.enqueue(new byte[0]);
Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
// Test with two UUIDs
runner.setProperty(ExecuteSparkJob.DATASOURCES, "87870c7e-8ae8-4db4-9959-c2f5a9496833,e4562514-8e06-459a-8ea9-1e2630c852f9");
runner.enqueue(new byte[0]);
Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
// Test with expression
runner.setProperty(ExecuteSparkJob.DATASOURCES, "${metadata.dataTransformation.datasourceIds}");
runner.enqueue(new byte[0], Collections.singletonMap("metadata.dataTransformation.datasourceIds", "87870c7e-8ae8-4db4-9959-c2f5a9496833"));
Assert.assertEquals(0, ((MockProcessContext) runner.getProcessContext()).validate().size());
}
use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.
the class ExecuteSparkJob method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
String PROVENANCE_JOB_STATUS_KEY = "Job Status";
String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code";
try {
PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status";
PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code";
/* Configuration parameters for spark launcher */
String appJar = context.getProperty(APPLICATION_JAR).evaluateAttributeExpressions(flowFile).getValue().trim();
String extraJars = context.getProperty(EXTRA_JARS).evaluateAttributeExpressions(flowFile).getValue();
String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue();
String mainClass = context.getProperty(MAIN_CLASS).evaluateAttributeExpressions(flowFile).getValue().trim();
String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue().trim();
String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE).evaluateAttributeExpressions(flowFile).getValue();
String appArgs = context.getProperty(MAIN_ARGS).evaluateAttributeExpressions(flowFile).getValue().trim();
String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile).getValue();
String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile).getValue();
String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME).evaluateAttributeExpressions(flowFile).getValue();
String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile).getValue();
String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile).getValue();
String principal = context.getProperty(kerberosPrincipal).getValue();
String keyTab = context.getProperty(kerberosKeyTab).getValue();
String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue();
String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue();
String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile).getValue();
Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT).evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue();
String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile).getValue();
MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE).asControllerService(MetadataProviderService.class);
final List<String> extraJarPaths = getExtraJarPaths(extraJars);
// If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated
boolean isAuthenticated = !StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab) && !StringUtils.isEmpty(hadoopConfigurationResources);
try {
if (isAuthenticated && isSecurityEnabled(hadoopConfigurationResources)) {
logger.info("Security is enabled");
if (principal.equals("") && keyTab.equals("")) {
logger.error("Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
logger.info("User authentication initiated");
boolean authenticationStatus = new ApplySecurityPolicy().validateUserWithKerberos(logger, hadoopConfigurationResources, principal, keyTab);
if (authenticationStatus) {
logger.info("User authenticated successfully.");
} else {
logger.error("User authentication failed. {} ", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
}
} catch (IOException e1) {
logger.error("Unknown exception occurred while authenticating user : {} and flow file: {}", new Object[] { e1.getMessage(), flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
} catch (Exception unknownException) {
logger.error("Unknown exception occurred while validating user : {}. {} ", new Object[] { unknownException.getMessage(), flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue();
// Build environment
final Map<String, String> env = getDatasources(session, flowFile, PROVENANCE_JOB_STATUS_KEY, datasourceIds, metadataService, extraJarPaths);
if (env == null) {
return;
}
/* Launch the spark job as a child process */
SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass).setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory).setConf(SPARK_NUM_EXECUTORS, numberOfExecutors).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory).setConf(SparkLauncher.EXECUTOR_CORES, executorCores).setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome).setAppName(sparkApplicationName);
OptionalSparkConfigurator optionalSparkConf = new OptionalSparkConfigurator(launcher).setDeployMode(sparkMaster, sparkYarnDeployMode).setAuthentication(isAuthenticated, keyTab, principal).addAppArgs(appArgs).addSparkArg(sparkConfs).addExtraJars(extraJarPaths).setYarnQueue(yarnQueue).setExtraFiles(extraFiles);
Process spark = optionalSparkConf.getLaucnher().launch();
/* Read/clear the process input stream */
InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getInputStream());
Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
inputThread.start();
/* Read/clear the process error stream */
InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getErrorStream());
Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
errorThread.start();
logger.info("Waiting for Spark job to complete");
/* Wait for job completion */
boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS);
if (!completed) {
spark.destroyForcibly();
getLog().error("Spark process timed out after {} seconds using flow file: {} ", new Object[] { sparkProcessTimeout, flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
int exitCode = spark.exitValue();
flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, Integer.toString(exitCode));
if (exitCode != 0) {
logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ", new Object[] { context.getName(), flowFile, exitCode });
flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed");
session.transfer(flowFile, REL_FAILURE);
} else {
logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ", new Object[] { context.getName(), flowFile, exitCode });
flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success");
session.transfer(flowFile, REL_SUCCESS);
}
} catch (final Exception e) {
logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e);
flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception");
flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage());
session.transfer(flowFile, REL_FAILURE);
}
}
use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.
the class PutFeedMetadataTest method setUp.
@Before
public void setUp() throws Exception {
// Setup services
final MetadataProviderService metadataService = new MockMetadataProviderService();
// Setup test runner
runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
runner.enableControllerService(metadataService);
runner.setProperty(TriggerCleanup.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
}
use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.
the class GetFeedsHistoryReindexTest method testNoFeedsToReindex.
@Test
public void testNoFeedsToReindex() throws Exception {
final String METADATA_SERVICE_IDENTIFIER = "MockMetadataProviderService_NoFeedsToIndex";
final TestRunner runner = TestRunners.newTestRunner(GetFeedsHistoryReindex.class);
final MetadataProviderService metadataService = new MockMetadataProviderService_NoFeedsToReindex();
runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
runner.enableControllerService(metadataService);
runner.setProperty(GetFeedsHistoryReindex.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
runner.run(1);
runner.assertQueueEmpty();
runner.assertTransferCount(GetFeedsHistoryReindex.REL_FOUND, 0);
runner.assertTransferCount(GetFeedsHistoryReindex.REL_NOT_FOUND, 1);
runner.assertTransferCount(GetFeedsHistoryReindex.REL_FAILURE, 0);
runner.assertTransferCount(GetFeedsHistoryReindex.REL_ORIGINAL, 0);
List<MockFlowFile> results = runner.getFlowFilesForRelationship(GetFeedsHistoryReindex.REL_NOT_FOUND);
MockFlowFile resultFlowFile = results.get(0);
resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY);
resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY);
resultFlowFile.assertAttributeExists(GetFeedsHistoryReindex.FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY);
resultFlowFile.assertAttributeEquals(GetFeedsHistoryReindex.FEEDS_TOTAL_COUNT_FOR_HISTORY_REINDEX_KEY, String.valueOf(0));
resultFlowFile.assertAttributeEquals(GetFeedsHistoryReindex.FEEDS_TOTAL_IDS_FOR_HISTORY_REINDEX_KEY, "[]");
resultFlowFile.assertAttributeNotEquals(GetFeedsHistoryReindex.FEEDS_CHECK_TIME_UTC_FOR_HISTORY_REINDEX_KEY, null);
}
use of com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService in project kylo by Teradata.
the class UpdateFeedHistoryReindexTest method setFeedStatus_ValidStateInProgress.
@Test
public void setFeedStatus_ValidStateInProgress() throws Exception {
final String METADATA_SERVICE_IDENTIFIER = "MockMetadataProviderService_Minimal";
final TestRunner runner = TestRunners.newTestRunner(UpdateFeedHistoryReindex.class);
final MetadataProviderService metadataService = new MockMetadataProviderService_Minimal();
runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
runner.enableControllerService(metadataService);
runner.setProperty(UpdateFeedHistoryReindex.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
runner.setProperty(UpdateFeedHistoryReindex.FEED_ID, "feed-0-id");
runner.setProperty(UpdateFeedHistoryReindex.FEED_REINDEX_STATUS, "IN_PROGRESS");
runner.assertValid();
}
Aggregations