Search in sources :

Example 1 with Profiler

use of com.thinkbiganalytics.spark.dataprofiler.Profiler in project kylo by Teradata.

the class TransformServiceTest method executeWithDatasourceProviderFactory.

/**
 * Verify executing a transformation request with a data source provider factory.
 */
@Test
@SuppressWarnings("unchecked")
public void executeWithDatasourceProviderFactory() throws Exception {
    // Mock data set
    final DataSet dataSet = Mockito.mock(DataSet.class);
    Mockito.when(dataSet.persist(Mockito.any(StorageLevel.class))).thenReturn(dataSet);
    Mockito.when(dataSet.schema()).thenReturn(new StructType());
    // Mock Spark context service
    final SparkContextService sparkContextService = Mockito.mock(SparkContextService.class);
    // Mock Spark script engine
    final SparkScriptEngine engine = Mockito.mock(SparkScriptEngine.class);
    Mockito.when(engine.eval(Mockito.anyString(), Mockito.anyListOf(NamedParam.class))).thenReturn(dataSet);
    Mockito.when(engine.getSparkContext()).thenReturn(Mockito.mock(SparkContext.class));
    // Mock data source provider factory
    final DatasourceProvider datasourceProvider = Mockito.mock(DatasourceProvider.class);
    final DatasourceProviderFactory datasourceProviderFactory = Mockito.mock(DatasourceProviderFactory.class);
    Mockito.when(datasourceProviderFactory.getDatasourceProvider(Mockito.anyCollectionOf(Datasource.class))).thenReturn(datasourceProvider);
    // Mock profiler
    final Profiler profiler = Mockito.mock(Profiler.class);
    // Test executing a request
    final TransformRequest request = new TransformRequest();
    request.setDatasources(Collections.singletonList(Mockito.mock(Datasource.class)));
    request.setScript("sqlContext.range(1,10)");
    final TransformService service = new TransformService(TransformScript.class, engine, sparkContextService, new MockJobTrackerService());
    service.setDatasourceProviderFactory(datasourceProviderFactory);
    service.setProfiler(profiler);
    final TransformResponse response = service.execute(request);
    Assert.assertEquals(TransformResponse.Status.PENDING, response.getStatus());
    // Test eval arguments
    final ArgumentCaptor<String> evalScript = ArgumentCaptor.forClass(String.class);
    final ArgumentCaptor<List> evalBindings = ArgumentCaptor.forClass(List.class);
    Mockito.verify(engine).eval(evalScript.capture(), evalBindings.capture());
    InputStream inputStream = getClass().getResourceAsStream("transform-service-script1.scala");
    final String expectedScript = IOUtils.toString(inputStream, "UTF-8");
    inputStream.close();
    Assert.assertEquals(expectedScript, evalScript.getValue());
    final List<NamedParam> bindings = evalBindings.getValue();
    Assert.assertEquals(2, bindings.size());
    Assert.assertEquals("sparkContextService", bindings.get(0).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.SparkContextService", bindings.get(0).tpe());
    Assert.assertEquals(sparkContextService, bindings.get(0).value());
    Assert.assertEquals("datasourceProvider", bindings.get(1).name());
    Assert.assertEquals("com.thinkbiganalytics.spark.shell.DatasourceProvider[org.apache.spark.sql.DataFrame]", bindings.get(1).tpe());
    Assert.assertEquals(datasourceProvider, bindings.get(1).value());
}
Also used : Datasource(com.thinkbiganalytics.spark.rest.model.Datasource) DatasourceProvider(com.thinkbiganalytics.spark.shell.DatasourceProvider) StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) InputStream(java.io.InputStream) DatasourceProviderFactory(com.thinkbiganalytics.spark.shell.DatasourceProviderFactory) NamedParam(scala.tools.nsc.interpreter.NamedParam) TransformRequest(com.thinkbiganalytics.spark.rest.model.TransformRequest) SparkContext(org.apache.spark.SparkContext) Profiler(com.thinkbiganalytics.spark.dataprofiler.Profiler) SparkContextService(com.thinkbiganalytics.spark.SparkContextService) TransformResponse(com.thinkbiganalytics.spark.rest.model.TransformResponse) List(java.util.List) SparkScriptEngine(com.thinkbiganalytics.spark.repl.SparkScriptEngine) StorageLevel(org.apache.spark.storage.StorageLevel) Test(org.junit.Test)

Aggregations

DataSet (com.thinkbiganalytics.spark.DataSet)1 SparkContextService (com.thinkbiganalytics.spark.SparkContextService)1 Profiler (com.thinkbiganalytics.spark.dataprofiler.Profiler)1 SparkScriptEngine (com.thinkbiganalytics.spark.repl.SparkScriptEngine)1 Datasource (com.thinkbiganalytics.spark.rest.model.Datasource)1 TransformRequest (com.thinkbiganalytics.spark.rest.model.TransformRequest)1 TransformResponse (com.thinkbiganalytics.spark.rest.model.TransformResponse)1 DatasourceProvider (com.thinkbiganalytics.spark.shell.DatasourceProvider)1 DatasourceProviderFactory (com.thinkbiganalytics.spark.shell.DatasourceProviderFactory)1 InputStream (java.io.InputStream)1 List (java.util.List)1 SparkContext (org.apache.spark.SparkContext)1 StructType (org.apache.spark.sql.types.StructType)1 StorageLevel (org.apache.spark.storage.StorageLevel)1 Test (org.junit.Test)1 NamedParam (scala.tools.nsc.interpreter.NamedParam)1