Search in sources :

Example 1 with SparkScriptEngine

use of com.thinkbiganalytics.spark.repl.SparkScriptEngine in project kylo by Teradata.

the class TransformServiceTest method executeWithDatasourceProviderFactory.

/**
 * Verify executing a transformation request with a data source provider factory.
 */
@Test
@SuppressWarnings("unchecked")
public void executeWithDatasourceProviderFactory() throws Exception {
    // Mock data set
    final DataSet dataSet = Mockito.mock(DataSet.class);
    Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
    Mockito.when(dataSet.schema()).thenReturn(new StructType());
    // Mock Spark context service
    final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
    // Mock Spark script engine
    final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
    Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
    Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
    // Mock data source provider factory
    final DatasourceProvider datasourceProvider = Mockito.mock(DatasourceProvider.class);
    final DatasourceProviderFactory datasourceProviderFactory = Mockito.mock(DatasourceProviderFactory.class);
    Mockito.when(datasourceProviderFactory.getDatasourceProvider(Mockito.anyCollectionOf(Datasource.class), Mockito.anyCollectionOf(DataSource.class))).thenReturn(datasourceProvider);
    // Mock profiler
    final Profiler profiler = Mockito.mock(Profiler.class);
    // Test executing a request
    final TransformRequest request = new TransformRequest();
    request.setDoProfile(true);
    request.setDatasources(Collections.singletonList(Mockito.mock(Datasource.class)));
    request.setScript("sqlContext.range(1,10)");
    final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService(), Mockito.mock(DataSetConverterService.class), Mockito.mock(KyloCatalogClientBuilder.class));
    service.setDatasourceProviderFactory(datasourceProviderFactory);
    service.setProfiler(profiler);
    final TransformResponse response = service.execute(request);
    Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
    // Test eval arguments
    final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
    final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
    Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
    InputStream inputStream = getClass().getResourceAsStream("transform-service-script1.scala");
    final String expectedScript = IOUtils.toString(inputStream, "UTF-8");
    inputStream.close();
    Assert.assertEquals(expectedScript, evalScript.getValue());
    final List<NamedParam> bindings = evalBindings.getValue();
    Assert.assertEquals(2, bindings.size());
    Assert.assertEquals("sparkContextService", bindings.get(0).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
    Assert.assertEquals(sparkContextService, bindings.get(0).value());
    Assert.assertEquals("datasourceProvider", bindings.get(1).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.shell.DatasourceProvider[org.apache.spark.sql.DataFrame]", bindings.get(1).tpe());
    Assert.assertEquals(datasourceProvider, bindings.get(1).value());
}
Also used : Datasource(com.thinkbiganalytics.spark.rest.model.Datasource) DatasourceProvider(com.thinkbiganalytics.spark.shell.DatasourceProvider) StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) InputStream(java.io.InputStream) DatasourceProviderFactory(com.thinkbiganalytics.spark.shell.DatasourceProviderFactory) NamedParam(scala.tools.nsc.interpreter.NamedParam) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) SparkContext(org.apache.spark.SparkContext) Profiler(com.thinkbiganalytics.spark.dataprofiler.Profiler) KyloCatalogClientBuilder(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder) SparkContextService(com.thinkbiganalytics.spark.SparkContextService) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) StorageLevel(org.apache.spark.storage.StorageLevel) Test(org.junit.Test)

Example 2 with SparkScriptEngine

use of com.thinkbiganalytics.spark.repl.SparkScriptEngine in project kylo by Teradata.

the class TransformServiceTest method execute.

/**
 * Verify executing a transformation request.
 */
@Test
@SuppressWarnings("unchecked")
public void execute() throws Exception {
    // Mock data set
    final DataSet dataSet = Mockito.mock(DataSet.class);
    Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
    Mockito.when(dataSet.schema()).thenReturn(new StructType());
    // Mock Spark context service
    final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
    // Mock Spark script engine
    final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
    Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
    Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
    // Test executing a request
    final TransformRequest request = new TransformRequest();
    request.setDoProfile(true);
    request.setScript("sqlContext.range(1,10)");
    final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService(), Mockito.mock(DataSetConverterService.class), Mockito.mock(KyloCatalogClientBuilder.class));
    final TransformResponse response = service.execute(request);
    Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
    // Test eval arguments
    final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
    final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
    Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
    String expectedScript = null;
    try (InputStream stream = getClass().getResourceAsStream("transform-service-script1.scala")) {
        expectedScript = IOUtils.toString(stream, "UTF-8");
    }
    if (expectedScript == null) {
        throw new Exception("transform-service-script1.scala failed to load");
    }
    Assert.assertEquals(expectedScript, evalScript.getValue());
    final List<NamedParam> bindings = evalBindings.getValue();
    Assert.assertEquals(1, bindings.size());
    Assert.assertEquals("sparkContextService", bindings.get(0).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
    Assert.assertEquals(sparkContextService, bindings.get(0).value());
}
Also used : StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) InputStream(java.io.InputStream) NamedParam(scala.tools.nsc.interpreter.NamedParam) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest) SparkContext(org.apache.spark.SparkContext) KyloCatalogClientBuilder(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder) SparkContextService(com.thinkbiganalytics.spark.SparkContextService) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) StorageLevel(org.apache.spark.storage.StorageLevel) Test(org.junit.Test)

Example 3 with SparkScriptEngine

use of com.thinkbiganalytics.spark.repl.SparkScriptEngine in project kylo by Teradata.

the class App method main.

/**
 * Evaluates a Scala file.
 *
 * @param args the command-line arguments
 * @throws Exception if an error occurs
 */
public static void main(@Nonnull String[] args) throws Exception {
    // Verify arguments
    if (args.length != 1) {
        System.err.println("error: usage: SparkShellApp file");
        System.exit(1);
    }
    // Load environment
    final AnnotationConfigApplicationContext ctx = new AnnotationConfigApplicationContext();
    ctx.register(SecurityCoreConfig.class);
    ctx.scan("com.thinkbiganalytics.spark", "com.thinkbiganalytics.kylo.catalog");
    ctx.refresh();
    File scriptFile = new File(args[0]);
    if (scriptFile.exists() && scriptFile.isFile()) {
        log.info("Loading script file at {} ", args[0]);
    } else {
        log.info("Couldn't find script file at {} will check classpath.", args[0]);
        String fileName = scriptFile.getName();
        scriptFile = new File("./" + fileName);
    }
    final String script = Files.toString(scriptFile, Charsets.UTF_8);
    // Prepare bindings
    final List<NamedParam> bindings = new ArrayList<>();
    final DatasourceProvider datasourceProvider = ctx.getBean(DatasourceProvider.class);
    bindings.add(new NamedParamClass("datasourceProvider", datasourceProvider.getClass().getName(), datasourceProvider));
    final CatalogDataSetProvider catalogDataSetProvider = ctx.getBean(CatalogDataSetProvider.class);
    bindings.add(new NamedParamClass("catalogDataSetProvider", catalogDataSetProvider.getClass().getName(), catalogDataSetProvider));
    // Execute script
    final SparkScriptEngine engine = ctx.getBean(SparkScriptEngine.class);
    engine.eval(script, bindings);
}
Also used : DatasourceProvider(com.thinkbiganalytics.spark.shell.DatasourceProvider) AnnotationConfigApplicationContext(org.springframework.context.annotation.AnnotationConfigApplicationContext) NamedParamClass(scala.tools.nsc.interpreter.NamedParamClass) ArrayList(java.util.ArrayList) NamedParam(scala.tools.nsc.interpreter.NamedParam) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) CatalogDataSetProvider(com.thinkbiganalytics.spark.shell.CatalogDataSetProvider) File(java.io.File)

Aggregations

SparkScriptEngine (com.thinkbiganalytics.spark.repl.SparkScriptEngine)3 NamedParam (scala.tools.nsc.interpreter.NamedParam)3 KyloCatalogClientBuilder (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder)2 DataSet (com.thinkbiganalytics.spark.DataSet)2 SparkContextService (com.thinkbiganalytics.spark.SparkContextService)2 TransformRequest (com.thinkbiganalytics.spark.rest.model.TransformRequest)2 TransformResponse (com.thinkbiganalytics.spark.rest.model.TransformResponse)2 DatasourceProvider (com.thinkbiganalytics.spark.shell.DatasourceProvider)2 InputStream (java.io.InputStream)2 List (java.util.List)2 SparkContext (org.apache.spark.SparkContext)2 StructType (org.apache.spark.sql.types.StructType)2 StorageLevel (org.apache.spark.storage.StorageLevel)2 Test (org.junit.Test)2 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)1 Profiler (com.thinkbiganalytics.spark.dataprofiler.Profiler)1 Datasource (com.thinkbiganalytics.spark.rest.model.Datasource)1 CatalogDataSetProvider (com.thinkbiganalytics.spark.shell.CatalogDataSetProvider)1 DatasourceProviderFactory (com.thinkbiganalytics.spark.shell.DatasourceProviderFactory)1 File (java.io.File)1