Search in sources :

Example 36 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class Spark2Test method testScalaSparkWithObjectStore.

@Test
public void testScalaSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 37 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class DataCleansingMapReduceTest method testPartitionConsuming.

@Test
public void testPartitionConsuming() throws Exception {
    ApplicationManager applicationManager = deployApplication(DataCleansing.class);
    ServiceManager serviceManager = applicationManager.getServiceManager(DataCleansingService.NAME).start();
    serviceManager.waitForStatus(true);
    URL serviceURL = serviceManager.getServiceURL();
    // write a set of records to one partition and run the DataCleansingMapReduce job on that one partition
    createPartition(serviceURL, RECORD_SET1);
    // before starting the MR, there are 0 invalid records and 0 valid records, according to metrics
    Assert.assertEquals(0, getValidityMetrics(true));
    Assert.assertEquals(0, getValidityMetrics(false));
    Long now = System.currentTimeMillis();
    ImmutableMap<String, String> args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(RECORD_SET1, true));
    compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(RECORD_SET1, false));
    // assert that some of the records have indeed been filtered
    Assert.assertNotEquals(filterRecords(RECORD_SET1, true), RECORD_SET1);
    Assert.assertNotEquals(filterRecords(RECORD_SET1, false), Collections.<String>emptySet());
    // verify this via metrics
    Assert.assertEquals(1, getValidityMetrics(true));
    Assert.assertEquals(1, getValidityMetrics(false));
    // create two additional partitions
    createPartition(serviceURL, RECORD_SET2);
    createPartition(serviceURL, RECORD_SET3);
    // running the MapReduce job now processes these two new partitions (RECORD_SET1 and RECORD_SET2) and creates a new
    // partition with with the output
    now = System.currentTimeMillis();
    args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
    ImmutableSet<String> recordSets2and3 = ImmutableSet.<String>builder().addAll(RECORD_SET2).addAll(RECORD_SET3).build();
    compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(recordSets2and3, true));
    compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(recordSets2and3, false));
    // verify this via metrics
    Assert.assertEquals(1, getValidityMetrics(true));
    Assert.assertEquals(5, getValidityMetrics(false));
    // running the MapReduce job without adding new partitions creates no additional output
    now = System.currentTimeMillis();
    args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
    compareData(now, DataCleansing.CLEAN_RECORDS, Collections.<String>emptySet());
    compareData(now, DataCleansing.INVALID_RECORDS, Collections.<String>emptySet());
    // verify that the records were properly partitioned on their zip
    DataSetManager<PartitionedFileSet> cleanRecords = getDataset(DataCleansing.CLEAN_RECORDS);
    PartitionFilter filter = PartitionFilter.builder().addValueCondition("zip", 84125).build();
    Assert.assertEquals(ImmutableSet.of(RECORD1, RECORD4, RECORD6), getDataFromFilter(cleanRecords.get(), filter));
    filter = PartitionFilter.builder().addValueCondition("zip", 84126).build();
    Assert.assertEquals(ImmutableSet.of(RECORD3, RECORD5), getDataFromFilter(cleanRecords.get(), filter));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) ServiceManager(co.cask.cdap.test.ServiceManager) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) URL(java.net.URL) Test(org.junit.Test)

Example 38 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class DecisionTreeRegressionAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the Application
    ApplicationManager appManager = deployApplication(DecisionTreeRegressionApp.class);
    // Start the Service
    ServiceManager serviceManager = appManager.getServiceManager(ModelDataService.SERVICE_NAME).start();
    serviceManager.waitForStatus(true, 30, 1);
    URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
    URL addDataURL = new URL(serviceURL, "labels");
    HttpRequest request = HttpRequest.builder(HttpMethod.PUT, addDataURL).withBody(new InputSupplier<InputStream>() {

        @Override
        public InputStream getInput() throws IOException {
            return getClass().getClassLoader().getResourceAsStream("sample_libsvm_data.txt");
        }
    }).build();
    HttpResponse response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    // Start a Spark Program
    SparkManager sparkManager = appManager.getSparkManager(ModelTrainer.NAME).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // Check that there is a new model
    URL listModelsURL = new URL(serviceURL, "models");
    request = HttpRequest.builder(HttpMethod.GET, listModelsURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    List<String> models = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<List<String>>() {
    }.getType());
    Assert.assertEquals(1, models.size());
    // Check that there is some model metadata
    String modelId = models.get(0);
    URL modelMetaURL = new URL(serviceURL, "models/" + modelId);
    request = HttpRequest.builder(HttpMethod.GET, modelMetaURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    ModelMeta meta = GSON.fromJson(response.getResponseBodyAsString(), ModelMeta.class);
    Assert.assertNotNull(meta);
    Assert.assertEquals(0.7, meta.getTrainingPercentage(), 0.000001);
    Assert.assertEquals(692, meta.getNumFeatures());
    // Check that the corresponding model file exists
    DataSetManager<FileSet> modelFiles = getDataset(DecisionTreeRegressionApp.MODEL_DATASET);
    Assert.assertTrue(modelFiles.get().getBaseLocation().append(modelId).exists());
}
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) ServiceManager(co.cask.cdap.test.ServiceManager) TypeToken(com.google.gson.reflect.TypeToken) InputSupplier(com.google.common.io.InputSupplier) Test(org.junit.Test)

Example 39 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class LogAnalysisAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the App
    ApplicationManager appManager = deployApplication(LogAnalysisApp.class);
    // Send a stream events to the Stream
    StreamManager streamManager = getStreamManager(LogAnalysisApp.LOG_STREAM);
    streamManager.send(LOG_1);
    streamManager.send(LOG_2);
    streamManager.send(LOG_3);
    // run the spark program
    SparkManager sparkManager = appManager.getSparkManager(LogAnalysisApp.ResponseCounterSpark.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // run the mapreduce job
    MapReduceManager mapReduceManager = appManager.getMapReduceManager(HitCounterProgram.class.getSimpleName()).start();
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    // start and wait for services
    ServiceManager hitCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.HIT_COUNTER_SERVICE);
    ServiceManager responseCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.RESPONSE_COUNTER_SERVICE);
    ServiceManager requestCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.REQUEST_COUNTER_SERVICE);
    // Query for hit counts and verify it
    URL totalHitsURL = new URL(hitCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.HitCounterServiceHandler.HIT_COUNTER_SERVICE_PATH);
    HttpResponse response = HttpRequests.execute(HttpRequest.post(totalHitsURL).withBody("{\"url\":\"" + "/home.html" + "\"}").build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
    Assert.assertEquals(TOTAL_HITS_VALUE, response.getResponseBodyAsString());
    // query for total responses for a response code and verify it
    URL responseCodeURL = new URL(responseCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.ResponseCounterHandler.RESPONSE_COUNT_PATH + "/" + RESPONSE_CODE);
    HttpRequest request = HttpRequest.get(responseCodeURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(TOTAL_RESPONSE_VALUE, response.getResponseBodyAsString());
    // query to get partitions in the request count tpfs
    URL requestCountFilsetsURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_COUNTER_PARTITIONS_PATH);
    request = HttpRequest.get(requestCountFilsetsURL).build();
    response = HttpRequests.execute(request);
    TreeSet<String> partitions = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<TreeSet<String>>() {
    }.getType());
    Assert.assertEquals(1, partitions.size());
    String partition = partitions.iterator().next();
    // Query for the contents of the files in this partition and verify
    URL requestFilesetContentURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_CONTENT_PATH);
    response = HttpRequests.execute(HttpRequest.post(requestFilesetContentURL).withBody("{\"" + LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_PATH_HANDLER_KEY + "\":\"" + partition + "\"}").build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
    Map<String, Integer> responseMap = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<Map<String, Integer>>() {
    }.getType());
    Assert.assertTrue(responseMap.equals(TPFS_RESULT));
}
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) StreamManager(co.cask.cdap.test.StreamManager) ServiceManager(co.cask.cdap.test.ServiceManager) TypeToken(com.google.common.reflect.TypeToken) Test(org.junit.Test)

Example 40 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class WikipediaPipelineAppTest method test.

@Test
@Category(XSlowTests.class)
public void test() throws Exception {
    WikipediaPipelineApp.WikipediaAppConfig appConfig = new WikipediaPipelineApp.WikipediaAppConfig();
    AppRequest<WikipediaPipelineApp.WikipediaAppConfig> appRequest = new AppRequest<>(ARTIFACT_SUMMARY, appConfig);
    ApplicationManager appManager = deployApplication(APP_ID, appRequest);
    // Setup input streams with test data
    createTestData();
    WorkflowManager workflowManager = appManager.getWorkflowManager(WikipediaPipelineWorkflow.NAME);
    // Test with default threshold. Workflow should not proceed beyond first condition.
    testWorkflow(workflowManager, appConfig, 1);
    // Test with a reduced threshold, so the workflow proceeds beyond the first predicate
    testWorkflow(workflowManager, appConfig, 2, 1);
    // Test K-Means
    appConfig = new WikipediaPipelineApp.WikipediaAppConfig("kmeans");
    appRequest = new AppRequest<>(ARTIFACT_SUMMARY, appConfig);
    appManager = deployApplication(APP_ID, appRequest);
    workflowManager = appManager.getWorkflowManager(WikipediaPipelineWorkflow.NAME);
    testWorkflow(workflowManager, appConfig, 3, 1);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) WorkflowManager(co.cask.cdap.test.WorkflowManager) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

ApplicationManager (co.cask.cdap.test.ApplicationManager)188 Test (org.junit.Test)155 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)88 ApplicationId (co.cask.cdap.proto.id.ApplicationId)71 AppRequest (co.cask.cdap.proto.artifact.AppRequest)61 WorkflowManager (co.cask.cdap.test.WorkflowManager)59 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)58 SparkManager (co.cask.cdap.test.SparkManager)52 Table (co.cask.cdap.api.dataset.table.Table)50 ServiceManager (co.cask.cdap.test.ServiceManager)48 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)47 Schema (co.cask.cdap.api.data.schema.Schema)47 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)45 StreamManager (co.cask.cdap.test.StreamManager)43 URL (java.net.URL)33 HashSet (java.util.HashSet)27 ArrayList (java.util.ArrayList)26 IOException (java.io.IOException)25 HashMap (java.util.HashMap)24 Set (java.util.Set)24