use of org.apache.spark.SparkContext$ in project kylo by Teradata.
the class Validator method run.
private void run(@Nonnull final PrintStream out, @Nonnull final String... args) {
// Check how many arguments were passed in
if (args.length < 4) {
String msg = "Proper Usage is: <targetDatabase> <entity> <partition> <path-to-policy-file>\n" + "You can optionally add: --hiveConf hive.setting=value --hiveConf hive.other.setting=value\n" + "You can optionally add: --storageLevel rdd_persistence_level_value\n" + "You can optionally add: --numPartitions number_of_rdd_partitions\n" + "You provided " + args.length + " args which are (comma separated): " + StringUtils.join(args, ",");
out.println(msg);
throw new IllegalArgumentException(msg);
}
final SparkContext sparkContext = SparkContext.getOrCreate();
try {
final ValidatorConfiguration params = new ValidatorConfiguration(args);
// Initialize Spring context
try (final ConfigurableApplicationContext ctx = new AnnotationConfigApplicationContext("com.thinkbiganalytics.spark")) {
final DataValidator app = ctx.getBean(DataValidator.class);
// Prepare Hive context
final HiveContext hiveContext = new HiveContext(sparkContext);
for (final Param param : params.getHiveParams()) {
log.info("Adding Hive parameter {}={}", param.getName(), param.getValue());
hiveContext.setConf(param.getName(), param.getValue());
}
log.info("Deployment Mode - {}", hiveContext.sparkContext().getConf().get("spark.submit.deployMode"));
Map<String, FieldPolicy> policyMap = ctx.getBean(FieldPolicyLoader.class).loadFieldPolicy(params.getFieldPolicyJsonPath());
// Run validation
final DataValidatorResult results = app.validateTable(params.getTargetDatabase(), params.getFeedTableName(), params.getValidTableName(), params.getPartition(), params.getNumPartitions(), policyMap, hiveContext);
log.info("Persistence level: {}", params.getStorageLevel());
results.persist(StorageLevel.fromString(params.getStorageLevel()));
app.saveInvalidToTable(params.getTargetDatabase(), params.getFeedTableName(), params.getInvalidTableName(), results, hiveContext);
app.saveValidToTable(params.getTargetDatabase(), params.getFeedTableName(), params.getValidTableName(), results, hiveContext);
app.saveProfileToTable(params.getTargetDatabase(), params.getProfileTableName(), params.getPartition(), results, hiveContext);
results.unpersist();
}
log.info("Validator app finished");
} catch (Exception e) {
log.error("Failed to perform validation: {}", e.toString(), e);
throw e;
}
}
use of org.apache.spark.SparkContext$ in project kylo by Teradata.
the class DataSourceResourceLoaderTest method addJar.
/**
* Verify adding a jar to the Spark context.
*/
@Test
public void addJar() {
// Mock Spark Context
final SparkContext sparkContext = Mockito.mock(SparkContext.class);
Mockito.when(sparkContext.hadoopConfiguration()).thenReturn(new Configuration(false));
// Test adding local:/ jar
final DataSourceResourceLoader loader = DataSourceResourceLoader.create(sparkContext);
final String jarUrl = getClass().getResource("./").toString();
loader.addJar(jarUrl.replace("file:", "local:"));
Mockito.verify(sparkContext, Mockito.times(1)).addJar(jarUrl);
Assert.assertNotNull(loader.getResource("DataSourceResourceLoaderTest.class"));
}
use of org.apache.spark.SparkContext$ in project kylo by Teradata.
the class MultiSparkExecApp method run.
private void run(@Nonnull final PrintStream out, @Nonnull final String... args) {
log.info("MultiSparkExecApp running...");
final SparkContext sparkContext = SparkContext.getOrCreate();
try {
final MultiSparkExecArguments sparkExecArgs = new MultiSparkExecArguments(args);
final List<SparkApplicationCommand> commands = sparkExecArgs.getCommands();
final List<Class<?>> appClasses = new ArrayList<>(sparkExecArgs.getCommands().size());
// Get the list of all app classes; verifying each have main() methods.
for (SparkApplicationCommand cmd : sparkExecArgs.getCommands()) {
appClasses.add(getApplicationClasses(cmd));
}
log.debug("Preparing to execute apps: {}", appClasses);
for (int idx = 0; idx < appClasses.size(); idx++) {
Class<?> appClass = appClasses.get(idx);
SparkApplicationCommand cmd = commands.get(idx);
System.out.println(">>> Beginning: " + cmd.getName() + " *****************************************************");
executeApp(appClass, cmd);
System.out.println("<<< Completed: " + cmd.getName() + " *****************************************************");
// TODO Generate provenance events.
}
log.info("MultiSparkExecApp finished");
} catch (Exception e) {
log.error("Execution failed", e);
throw e;
} finally {
sparkContext.stop();
}
}
use of org.apache.spark.SparkContext$ in project kylo by Teradata.
the class SpringTestConfigV1 method sqlContext.
@Bean
public SQLContext sqlContext() {
final SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Profiler Test");
final SparkContext sc = new SparkContext(conf);
return new SQLContext(sc);
}
use of org.apache.spark.SparkContext$ in project kylo by Teradata.
the class SparkScriptEngine method executeWithSparkClassLoader.
/**
* Executes the specified callable after replacing the current context class loader.
*
* <p>This is a work-around to avoid {@link ClassCastException} issues caused by conflicts between Hadoop and Kylo Spark Shell. Spark uses the context class loader when loading Hadoop components
* for running Spark on YARN. When both Hadoop and Kylo Spark Shell provide the same class then both classes are loaded when creating a {@link SparkContext}. The fix is to set the context class
* loader to the same class loader that was used to load the {@link SparkContext} class.</p>
*
* @param callable the function to be executed
* @param <T> the return type
* @return the return value
*/
private <T> T executeWithSparkClassLoader(@Nonnull final Callable<T> callable) {
// Set context class loader
final Thread currentThread = Thread.currentThread();
final ClassLoader contextClassLoader = currentThread.getContextClassLoader();
final ClassLoader sparkClassLoader = new ForwardingClassLoader(SparkContext.class.getClassLoader(), getClassLoader());
currentThread.setContextClassLoader(sparkClassLoader);
// Execute callable
try {
return callable.call();
} catch (final Exception e) {
throw Throwables.propagate(e);
} finally {
// Reset context class loader
currentThread.setContextClassLoader(contextClassLoader);
}
}
Aggregations