Search in sources :

Example 1 with SparkContextService

use of com.thinkbiganalytics.spark.SparkContextService in project kylo by Teradata.

the class TransformServiceTest method executeWithDatasourceProviderFactory.

/**
 * Verify executing a transformation request with a data source provider factory.
 */
@Test
@SuppressWarnings("unchecked")
public void executeWithDatasourceProviderFactory() throws Exception {
    // Mock data set
    final DataSet dataSet = Mockito.mock(DataSet.class);
    Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
    Mockito.when(dataSet.schema()).thenReturn(new StructType());
    // Mock Spark context service
    final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
    // Mock Spark script engine
    final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
    Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
    Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
    // Mock data source provider factory
    final DatasourceProvider datasourceProvider = Mockito.mock(DatasourceProvider.class);
    final DatasourceProviderFactory datasourceProviderFactory = Mockito.mock(DatasourceProviderFactory.class);
    Mockito.when(datasourceProviderFactory.getDatasourceProvider(Mockito.anyCollectionOf(Datasource.class), Mockito.anyCollectionOf(DataSource.class))).thenReturn(datasourceProvider);
    // Mock profiler
    final Profiler profiler = Mockito.mock(Profiler.class);
    // Test executing a request
    final TransformRequest request = new TransformRequest();
    request.setDoProfile(true);
    request.setDatasources(Collections.singletonList(Mockito.mock(Datasource.class)));
    request.setScript("sqlContext.range(1,10)");
    final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService(), Mockito.mock(DataSetConverterService.class), Mockito.mock(KyloCatalogClientBuilder.class));
    service.setDatasourceProviderFactory(datasourceProviderFactory);
    service.setProfiler(profiler);
    final TransformResponse response = service.execute(request);
    Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
    // Test eval arguments
    final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
    final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
    Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
    InputStream inputStream = getClass().getResourceAsStream("transform-service-script1.scala");
    final String expectedScript = IOUtils.toString(inputStream, "UTF-8");
    inputStream.close();
    Assert.assertEquals(expectedScript, evalScript.getValue());
    final List<NamedParam> bindings = evalBindings.getValue();
    Assert.assertEquals(2, bindings.size());
    Assert.assertEquals("sparkContextService", bindings.get(0).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
    Assert.assertEquals(sparkContextService, bindings.get(0).value());
    Assert.assertEquals("datasourceProvider", bindings.get(1).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.shell.DatasourceProvider[org.apache.spark.sql.DataFrame]", bindings.get(1).tpe());
    Assert.assertEquals(datasourceProvider, bindings.get(1).value());
}
Also used : Datasource(com.thinkbiganalytics.spark.rest.model.Datasource) DatasourceProvider(com.thinkbiganalytics.spark.shell.DatasourceProvider) StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) InputStream(java.io.InputStream) DatasourceProviderFactory(com.thinkbiganalytics.spark.shell.DatasourceProviderFactory) NamedParam(scala.tools.nsc.interpreter.NamedParam) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) SparkContext(org.apache.spark.SparkContext) Profiler(com.thinkbiganalytics.spark.dataprofiler.Profiler) KyloCatalogClientBuilder(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder) SparkContextService(com.thinkbiganalytics.spark.SparkContextService) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) StorageLevel(org.apache.spark.storage.StorageLevel) Test(org.junit.Test)

Example 2 with SparkContextService

use of com.thinkbiganalytics.spark.SparkContextService in project kylo by Teradata.

the class TransformServiceTest method execute.

/**
 * Verify executing a transformation request.
 */
@Test
@SuppressWarnings("unchecked")
public void execute() throws Exception {
    // Mock data set
    final DataSet dataSet = Mockito.mock(DataSet.class);
    Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
    Mockito.when(dataSet.schema()).thenReturn(new StructType());
    // Mock Spark context service
    final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
    // Mock Spark script engine
    final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
    Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
    Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
    // Test executing a request
    final TransformRequest request = new TransformRequest();
    request.setDoProfile(true);
    request.setScript("sqlContext.range(1,10)");
    final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService(), Mockito.mock(DataSetConverterService.class), Mockito.mock(KyloCatalogClientBuilder.class));
    final TransformResponse response = service.execute(request);
    Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
    // Test eval arguments
    final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
    final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
    Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
    String expectedScript = null;
    try (InputStream stream = getClass().getResourceAsStream("transform-service-script1.scala")) {
        expectedScript = IOUtils.toString(stream, "UTF-8");
    }
    if (expectedScript == null) {
        throw new Exception("transform-service-script1.scala failed to load");
    }
    Assert.assertEquals(expectedScript, evalScript.getValue());
    final List<NamedParam> bindings = evalBindings.getValue();
    Assert.assertEquals(1, bindings.size());
    Assert.assertEquals("sparkContextService", bindings.get(0).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
    Assert.assertEquals(sparkContextService, bindings.get(0).value());
}
Also used : StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) InputStream(java.io.InputStream) NamedParam(scala.tools.nsc.interpreter.NamedParam) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest) SparkContext(org.apache.spark.SparkContext) KyloCatalogClientBuilder(com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder) SparkContextService(com.thinkbiganalytics.spark.SparkContextService) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) StorageLevel(org.apache.spark.storage.StorageLevel) Test(org.junit.Test)

Aggregations

KyloCatalogClientBuilder (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClientBuilder)2 DataSet (com.thinkbiganalytics.spark.DataSet)2 SparkContextService (com.thinkbiganalytics.spark.SparkContextService)2 SparkScriptEngine (com.thinkbiganalytics.spark.repl.SparkScriptEngine)2 TransformRequest (com.thinkbiganalytics.spark.rest.model.TransformRequest)2 TransformResponse (com.thinkbiganalytics.spark.rest.model.TransformResponse)2 InputStream (java.io.InputStream)2 List (java.util.List)2 SparkContext (org.apache.spark.SparkContext)2 StructType (org.apache.spark.sql.types.StructType)2 StorageLevel (org.apache.spark.storage.StorageLevel)2 Test (org.junit.Test)2 NamedParam (scala.tools.nsc.interpreter.NamedParam)2 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)1 Profiler (com.thinkbiganalytics.spark.dataprofiler.Profiler)1 Datasource (com.thinkbiganalytics.spark.rest.model.Datasource)1 DatasourceProvider (com.thinkbiganalytics.spark.shell.DatasourceProvider)1 DatasourceProviderFactory (com.thinkbiganalytics.spark.shell.DatasourceProviderFactory)1