use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.
the class Spark2Test method testScalaSparkWithObjectStore.
@Test
public void testScalaSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
}
use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.
the class DataCleansingMapReduceTest method testPartitionConsuming.
@Test
public void testPartitionConsuming() throws Exception {
ApplicationManager applicationManager = deployApplication(DataCleansing.class);
ServiceManager serviceManager = applicationManager.getServiceManager(DataCleansingService.NAME).start();
serviceManager.waitForStatus(true);
URL serviceURL = serviceManager.getServiceURL();
// write a set of records to one partition and run the DataCleansingMapReduce job on that one partition
createPartition(serviceURL, RECORD_SET1);
// before starting the MR, there are 0 invalid records and 0 valid records, according to metrics
Assert.assertEquals(0, getValidityMetrics(true));
Assert.assertEquals(0, getValidityMetrics(false));
Long now = System.currentTimeMillis();
ImmutableMap<String, String> args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(RECORD_SET1, true));
compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(RECORD_SET1, false));
// assert that some of the records have indeed been filtered
Assert.assertNotEquals(filterRecords(RECORD_SET1, true), RECORD_SET1);
Assert.assertNotEquals(filterRecords(RECORD_SET1, false), Collections.<String>emptySet());
// verify this via metrics
Assert.assertEquals(1, getValidityMetrics(true));
Assert.assertEquals(1, getValidityMetrics(false));
// create two additional partitions
createPartition(serviceURL, RECORD_SET2);
createPartition(serviceURL, RECORD_SET3);
// running the MapReduce job now processes these two new partitions (RECORD_SET1 and RECORD_SET2) and creates a new
// partition with with the output
now = System.currentTimeMillis();
args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
ImmutableSet<String> recordSets2and3 = ImmutableSet.<String>builder().addAll(RECORD_SET2).addAll(RECORD_SET3).build();
compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(recordSets2and3, true));
compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(recordSets2and3, false));
// verify this via metrics
Assert.assertEquals(1, getValidityMetrics(true));
Assert.assertEquals(5, getValidityMetrics(false));
// running the MapReduce job without adding new partitions creates no additional output
now = System.currentTimeMillis();
args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
compareData(now, DataCleansing.CLEAN_RECORDS, Collections.<String>emptySet());
compareData(now, DataCleansing.INVALID_RECORDS, Collections.<String>emptySet());
// verify that the records were properly partitioned on their zip
DataSetManager<PartitionedFileSet> cleanRecords = getDataset(DataCleansing.CLEAN_RECORDS);
PartitionFilter filter = PartitionFilter.builder().addValueCondition("zip", 84125).build();
Assert.assertEquals(ImmutableSet.of(RECORD1, RECORD4, RECORD6), getDataFromFilter(cleanRecords.get(), filter));
filter = PartitionFilter.builder().addValueCondition("zip", 84126).build();
Assert.assertEquals(ImmutableSet.of(RECORD3, RECORD5), getDataFromFilter(cleanRecords.get(), filter));
}
use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.
the class DecisionTreeRegressionAppTest method test.
@Test
public void test() throws Exception {
// Deploy the Application
ApplicationManager appManager = deployApplication(DecisionTreeRegressionApp.class);
// Start the Service
ServiceManager serviceManager = appManager.getServiceManager(ModelDataService.SERVICE_NAME).start();
serviceManager.waitForStatus(true, 30, 1);
URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
URL addDataURL = new URL(serviceURL, "labels");
HttpRequest request = HttpRequest.builder(HttpMethod.PUT, addDataURL).withBody(new InputSupplier<InputStream>() {
@Override
public InputStream getInput() throws IOException {
return getClass().getClassLoader().getResourceAsStream("sample_libsvm_data.txt");
}
}).build();
HttpResponse response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
// Start a Spark Program
SparkManager sparkManager = appManager.getSparkManager(ModelTrainer.NAME).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// Check that there is a new model
URL listModelsURL = new URL(serviceURL, "models");
request = HttpRequest.builder(HttpMethod.GET, listModelsURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
List<String> models = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<List<String>>() {
}.getType());
Assert.assertEquals(1, models.size());
// Check that there is some model metadata
String modelId = models.get(0);
URL modelMetaURL = new URL(serviceURL, "models/" + modelId);
request = HttpRequest.builder(HttpMethod.GET, modelMetaURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
ModelMeta meta = GSON.fromJson(response.getResponseBodyAsString(), ModelMeta.class);
Assert.assertNotNull(meta);
Assert.assertEquals(0.7, meta.getTrainingPercentage(), 0.000001);
Assert.assertEquals(692, meta.getNumFeatures());
// Check that the corresponding model file exists
DataSetManager<FileSet> modelFiles = getDataset(DecisionTreeRegressionApp.MODEL_DATASET);
Assert.assertTrue(modelFiles.get().getBaseLocation().append(modelId).exists());
}
use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.
the class LogAnalysisAppTest method test.
@Test
public void test() throws Exception {
// Deploy the App
ApplicationManager appManager = deployApplication(LogAnalysisApp.class);
// Send a stream events to the Stream
StreamManager streamManager = getStreamManager(LogAnalysisApp.LOG_STREAM);
streamManager.send(LOG_1);
streamManager.send(LOG_2);
streamManager.send(LOG_3);
// run the spark program
SparkManager sparkManager = appManager.getSparkManager(LogAnalysisApp.ResponseCounterSpark.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// run the mapreduce job
MapReduceManager mapReduceManager = appManager.getMapReduceManager(HitCounterProgram.class.getSimpleName()).start();
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
// start and wait for services
ServiceManager hitCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.HIT_COUNTER_SERVICE);
ServiceManager responseCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.RESPONSE_COUNTER_SERVICE);
ServiceManager requestCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.REQUEST_COUNTER_SERVICE);
// Query for hit counts and verify it
URL totalHitsURL = new URL(hitCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.HitCounterServiceHandler.HIT_COUNTER_SERVICE_PATH);
HttpResponse response = HttpRequests.execute(HttpRequest.post(totalHitsURL).withBody("{\"url\":\"" + "/home.html" + "\"}").build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Assert.assertEquals(TOTAL_HITS_VALUE, response.getResponseBodyAsString());
// query for total responses for a response code and verify it
URL responseCodeURL = new URL(responseCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.ResponseCounterHandler.RESPONSE_COUNT_PATH + "/" + RESPONSE_CODE);
HttpRequest request = HttpRequest.get(responseCodeURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(TOTAL_RESPONSE_VALUE, response.getResponseBodyAsString());
// query to get partitions in the request count tpfs
URL requestCountFilsetsURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_COUNTER_PARTITIONS_PATH);
request = HttpRequest.get(requestCountFilsetsURL).build();
response = HttpRequests.execute(request);
TreeSet<String> partitions = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<TreeSet<String>>() {
}.getType());
Assert.assertEquals(1, partitions.size());
String partition = partitions.iterator().next();
// Query for the contents of the files in this partition and verify
URL requestFilesetContentURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_CONTENT_PATH);
response = HttpRequests.execute(HttpRequest.post(requestFilesetContentURL).withBody("{\"" + LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_PATH_HANDLER_KEY + "\":\"" + partition + "\"}").build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Map<String, Integer> responseMap = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<Map<String, Integer>>() {
}.getType());
Assert.assertTrue(responseMap.equals(TPFS_RESULT));
}
use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.
the class WikipediaPipelineAppTest method test.
@Test
@Category(XSlowTests.class)
public void test() throws Exception {
WikipediaPipelineApp.WikipediaAppConfig appConfig = new WikipediaPipelineApp.WikipediaAppConfig();
AppRequest<WikipediaPipelineApp.WikipediaAppConfig> appRequest = new AppRequest<>(ARTIFACT_SUMMARY, appConfig);
ApplicationManager appManager = deployApplication(APP_ID, appRequest);
// Setup input streams with test data
createTestData();
WorkflowManager workflowManager = appManager.getWorkflowManager(WikipediaPipelineWorkflow.NAME);
// Test with default threshold. Workflow should not proceed beyond first condition.
testWorkflow(workflowManager, appConfig, 1);
// Test with a reduced threshold, so the workflow proceeds beyond the first predicate
testWorkflow(workflowManager, appConfig, 2, 1);
// Test K-Means
appConfig = new WikipediaPipelineApp.WikipediaAppConfig("kmeans");
appRequest = new AppRequest<>(ARTIFACT_SUMMARY, appConfig);
appManager = deployApplication(APP_ID, appRequest);
workflowManager = appManager.getWorkflowManager(WikipediaPipelineWorkflow.NAME);
testWorkflow(workflowManager, appConfig, 3, 1);
}
Aggregations