use of com.thinkbiganalytics.spark.shell.DatasourceProvider in project kylo by Teradata.
the class TransformServiceTest method executeWithDatasourceProviderFactory.
/**
* Verify executing a transformation request with a data source provider factory.
*/
@Test
@SuppressWarnings("unchecked")
public void executeWithDatasourceProviderFactory() throws Exception {
// Mock data set
final DataSet dataSet = Mockito.mock(DataSet.class);
Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
Mockito.when(dataSet.schema()).thenReturn(new StructType());
// Mock Spark context service
final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
// Mock Spark script engine
final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
// Mock data source provider factory
final DatasourceProvider datasourceProvider = Mockito.mock(DatasourceProvider.class);
final DatasourceProviderFactory datasourceProviderFactory = Mockito.mock(DatasourceProviderFactory.class);
Mockito.when(datasourceProviderFactory.getDatasourceProvider(Mockito.anyCollectionOf(Datasource.class), Mockito.anyCollectionOf(DataSource.class))).thenReturn(datasourceProvider);
// Mock profiler
final Profiler profiler = Mockito.mock(Profiler.class);
// Test executing a request
final TransformRequest request = new TransformRequest();
request.setDoProfile(true);
request.setDatasources(Collections.singletonList(Mockito.mock(Datasource.class)));
request.setScript("sqlContext.range(1,10)");
final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService(), Mockito.mock(DataSetConverterService.class), Mockito.mock(KyloCatalogClientBuilder.class));
service.setDatasourceProviderFactory(datasourceProviderFactory);
service.setProfiler(profiler);
final TransformResponse response = service.execute(request);
Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
// Test eval arguments
final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
InputStream inputStream = getClass().getResourceAsStream("transform-service-script1.scala");
final String expectedScript = IOUtils.toString(inputStream, "UTF-8");
inputStream.close();
Assert.assertEquals(expectedScript, evalScript.getValue());
final List<NamedParam> bindings = evalBindings.getValue();
Assert.assertEquals(2, bindings.size());
Assert.assertEquals("sparkContextService", bindings.get(0).name());
Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
Assert.assertEquals(sparkContextService, bindings.get(0).value());
Assert.assertEquals("datasourceProvider", bindings.get(1).name());
Assert.assertEquals("com.thinkbiganalytics.spark.shell.DatasourceProvider[org.apache.spark.sql.DataFrame]", bindings.get(1).tpe());
Assert.assertEquals(datasourceProvider, bindings.get(1).value());
}
use of com.thinkbiganalytics.spark.shell.DatasourceProvider in project kylo by Teradata.
the class TransformService method createShellTask.
/**
* Creates a new Spark shell transformation.
*/
@Nonnull
private DataSet createShellTask(@Nonnull final TransformRequest request) throws ScriptException {
log.entry(request);
// Build bindings list
final List<NamedParam> bindings = new ArrayList<>();
bindings.add(new NamedParamClass("sparkContextService", SparkContextService.class.getName(), sparkContextService));
if ((request.getDatasources() != null && !request.getDatasources().isEmpty()) || (request.getCatalogDataSources() != null && !request.getCatalogDataSources().isEmpty())) {
if (datasourceProviderFactory != null) {
List<Datasource> legacyDataSources = request.getDatasources() != null ? request.getDatasources() : new ArrayList<Datasource>();
List<DataSource> catalogDataSources = request.getCatalogDataSources() != null ? request.getCatalogDataSources() : new ArrayList<DataSource>();
final DatasourceProvider datasourceProvider = datasourceProviderFactory.getDatasourceProvider(legacyDataSources, catalogDataSources);
bindings.add(new NamedParamClass("datasourceProvider", DatasourceProvider.class.getName() + "[org.apache.spark.sql.DataFrame]", datasourceProvider));
} else {
throw log.throwing(new ScriptException("Script cannot be executed because no data source provider factory is available."));
}
}
if (request.getCatalogDatasets() != null && !request.getCatalogDatasets().isEmpty()) {
if (catalogDataSetProviderFactory != null) {
log.info("Creating new Shell task with {} data sets ", request.getCatalogDatasets().size());
final CatalogDataSetProvider catalogDataSetProvider = catalogDataSetProviderFactory.getDataSetProvider(request.getCatalogDatasets());
bindings.add(new NamedParamClass("catalogDataSetProvider", CatalogDataSetProvider.class.getName() + "[org.apache.spark.sql.DataFrame]", catalogDataSetProvider));
} else {
throw log.throwing(new ScriptException("Script cannot be executed because no data source provider factory is available."));
}
}
// Ensure SessionState is valid
if (SessionState.get() == null && sessionState != null) {
SessionState.setCurrentSessionState(sessionState);
}
// Execute script
final Object result;
try {
result = this.engine.eval(toTransformScript(request), bindings);
} catch (final Exception cause) {
throw log.throwing(new ScriptException(cause));
}
if (result instanceof DataSet) {
return log.exit((DataSet) result);
} else {
throw log.throwing(new IllegalStateException("Unexpected script result type: " + (result != null ? result.getClass() : null)));
}
}
use of com.thinkbiganalytics.spark.shell.DatasourceProvider in project kylo by Teradata.
the class App method main.
/**
* Evaluates a Scala file.
*
* @param args the command-line arguments
* @throws Exception if an error occurs
*/
public static void main(@Nonnull String[] args) throws Exception {
// Verify arguments
if (args.length != 1) {
System.err.println("error: usage: SparkShellApp file");
System.exit(1);
}
// Load environment
final AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext();
ctx.register(SecurityCoreConfig.class);
ctx.scan("com.thinkbiganalytics.spark", "com.thinkbiganalytics.kylo.catalog");
ctx.refresh();
File scriptFile = new File(args[0]);
if (scriptFile.exists() && scriptFile.isFile()) {
log.info("Loading script file at {} ", args[0]);
} else {
log.info("Couldn't find script file at {} will check classpath.", args[0]);
String fileName = scriptFile.getName();
scriptFile = new File("./" + fileName);
}
final String script = Files.toString(scriptFile, Charsets.UTF_8);
// Prepare bindings
final List<NamedParam> bindings = new ArrayList<>();
final DatasourceProvider datasourceProvider = ctx.getBean(DatasourceProvider.class);
bindings.add(new NamedParamClass("datasourceProvider", datasourceProvider.getClass().getName(), datasourceProvider));
final CatalogDataSetProvider catalogDataSetProvider = ctx.getBean(CatalogDataSetProvider.class);
bindings.add(new NamedParamClass("catalogDataSetProvider", catalogDataSetProvider.getClass().getName(), catalogDataSetProvider));
// Execute script
final SparkScriptEngine engine = ctx.getBean(SparkScriptEngine.class);
engine.eval(script, bindings);
}
Aggregations