use of com.thinkbiganalytics.spark.shell.CatalogDataSetProvider in project kylo by Teradata.
the class TransformService method createShellTask.
/**
* Creates a new Spark shell transformation.
*/
@Nonnull
private DataSet createShellTask(@Nonnull final TransformRequest request) throws ScriptException {
log.entry(request);
// Build bindings list
final List<NamedParam> bindings = new ArrayList<>();
bindings.add(new NamedParamClass("sparkContextService", SparkContextService.class.getName(), sparkContextService));
if ((request.getDatasources() != null && !request.getDatasources().isEmpty()) || (request.getCatalogDataSources() != null && !request.getCatalogDataSources().isEmpty())) {
if (datasourceProviderFactory != null) {
List<Datasource> legacyDataSources = request.getDatasources() != null ? request.getDatasources() : new ArrayList<Datasource>();
List<DataSource> catalogDataSources = request.getCatalogDataSources() != null ? request.getCatalogDataSources() : new ArrayList<DataSource>();
final DatasourceProvider datasourceProvider = datasourceProviderFactory.getDatasourceProvider(legacyDataSources, catalogDataSources);
bindings.add(new NamedParamClass("datasourceProvider", DatasourceProvider.class.getName() + "[org.apache.spark.sql.DataFrame]", datasourceProvider));
} else {
throw log.throwing(new ScriptException("Script cannot be executed because no data source provider factory is available."));
}
}
if (request.getCatalogDatasets() != null && !request.getCatalogDatasets().isEmpty()) {
if (catalogDataSetProviderFactory != null) {
log.info("Creating new Shell task with {} data sets ", request.getCatalogDatasets().size());
final CatalogDataSetProvider catalogDataSetProvider = catalogDataSetProviderFactory.getDataSetProvider(request.getCatalogDatasets());
bindings.add(new NamedParamClass("catalogDataSetProvider", CatalogDataSetProvider.class.getName() + "[org.apache.spark.sql.DataFrame]", catalogDataSetProvider));
} else {
throw log.throwing(new ScriptException("Script cannot be executed because no data source provider factory is available."));
}
}
// Ensure SessionState is valid
if (SessionState.get() == null && sessionState != null) {
SessionState.setCurrentSessionState(sessionState);
}
// Execute script
final Object result;
try {
result = this.engine.eval(toTransformScript(request), bindings);
} catch (final Exception cause) {
throw log.throwing(new ScriptException(cause));
}
if (result instanceof DataSet) {
return log.exit((DataSet) result);
} else {
throw log.throwing(new IllegalStateException("Unexpected script result type: " + (result != null ? result.getClass() : null)));
}
}
use of com.thinkbiganalytics.spark.shell.CatalogDataSetProvider in project kylo by Teradata.
the class App method main.
/**
* Evaluates a Scala file.
*
* @param args the command-line arguments
* @throws Exception if an error occurs
*/
public static void main(@Nonnull String[] args) throws Exception {
// Verify arguments
if (args.length != 1) {
System.err.println("error: usage: SparkShellApp file");
System.exit(1);
}
// Load environment
final AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext();
ctx.register(SecurityCoreConfig.class);
ctx.scan("com.thinkbiganalytics.spark", "com.thinkbiganalytics.kylo.catalog");
ctx.refresh();
File scriptFile = new File(args[0]);
if (scriptFile.exists() && scriptFile.isFile()) {
log.info("Loading script file at {} ", args[0]);
} else {
log.info("Couldn't find script file at {} will check classpath.", args[0]);
String fileName = scriptFile.getName();
scriptFile = new File("./" + fileName);
}
final String script = Files.toString(scriptFile, Charsets.UTF_8);
// Prepare bindings
final List<NamedParam> bindings = new ArrayList<>();
final DatasourceProvider datasourceProvider = ctx.getBean(DatasourceProvider.class);
bindings.add(new NamedParamClass("datasourceProvider", datasourceProvider.getClass().getName(), datasourceProvider));
final CatalogDataSetProvider catalogDataSetProvider = ctx.getBean(CatalogDataSetProvider.class);
bindings.add(new NamedParamClass("catalogDataSetProvider", catalogDataSetProvider.getClass().getName(), catalogDataSetProvider));
// Execute script
final SparkScriptEngine engine = ctx.getBean(SparkScriptEngine.class);
engine.eval(script, bindings);
}
Aggregations