use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class DataQualityAppTest method testMeanContentLength.
@Test
public void testMeanContentLength() throws Exception {
Map<String, Set<String>> testMap = new HashMap<>();
Set<String> testSet = new HashSet<>();
testSet.add("Mean");
testMap.put("content_length", testSet);
DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "avg", testMap);
ApplicationId appId = NamespaceId.DEFAULT.app("newApp2");
AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
ApplicationManager applicationManager = deployApplication(appId, appRequest);
MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
ServiceManager serviceManager = applicationManager.getServiceManager(DataQualityService.SERVICE_NAME).start();
serviceManager.waitForStatus(true);
/* Test for aggregationsGetter handler */
URL url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length/aggregations/Mean/timeseries");
HttpResponse httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
String response = httpResponse.getResponseBodyAsString();
List<TimestampValue> tsValueListActual = GSON.fromJson(response, TOKEN_TYPE_LIST_TIMESTAMP_VALUE);
TimestampValue firstTimestampValue = tsValueListActual.get(0);
Assert.assertEquals(256.0, firstTimestampValue.getValue());
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class DataQualityAppTest method testTotals.
@Test
public void testTotals() throws Exception {
Map<String, Set<String>> testMap = new HashMap<>();
Set<String> testSet = new HashSet<>();
testSet.add("DiscreteValuesHistogram");
testMap.put("content_length", testSet);
testMap.put("status", testSet);
testMap.put("request_time", testSet);
DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "histogram", testMap);
ApplicationId appId = NamespaceId.DEFAULT.app("newApp3");
AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
ApplicationManager applicationManager = deployApplication(appId, appRequest);
MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
Map<String, Integer> expectedMap = new HashMap<>();
expectedMap.put("256", 3);
/* Test for the aggregationsGetter handler */
ServiceManager serviceManager = applicationManager.getServiceManager(DataQualityService.SERVICE_NAME).start();
serviceManager.waitForStatus(true);
URL url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length/aggregations/DiscreteValuesHistogram/totals");
HttpResponse httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
String response = httpResponse.getResponseBodyAsString();
Map<String, Integer> histogramMap = GSON.fromJson(response, TOKEN_TYPE_MAP_STRING_INTEGER);
Assert.assertEquals(expectedMap, histogramMap);
/* Test for the fieldsGetter handler */
url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields");
httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
response = httpResponse.getResponseBodyAsString();
Set<FieldDetail> outputSet = GSON.fromJson(response, TOKEN_TYPE_SET_FIELD_DETAIL);
Set<FieldDetail> expectedSet = new HashSet<>();
AggregationTypeValue aggregationTypeValue = new AggregationTypeValue("DiscreteValuesHistogram", true);
Set<AggregationTypeValue> aggregationTypeValuesList = Sets.newHashSet(aggregationTypeValue);
expectedSet.add(new FieldDetail("content_length", aggregationTypeValuesList));
expectedSet.add(new FieldDetail("request_time", aggregationTypeValuesList));
expectedSet.add(new FieldDetail("status", aggregationTypeValuesList));
Assert.assertEquals(expectedSet, outputSet);
/* Test for the aggregationTypesGetter handler */
url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length");
httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
response = httpResponse.getResponseBodyAsString();
List<AggregationTypeValue> expectedAggregationTypeValuesList = new ArrayList<>();
List<AggregationTypeValue> outputAggregationTypeValuesList = GSON.fromJson(response, TOKEN_TYPE_LIST_AGGREGATION_TYPE_VALUES);
expectedAggregationTypeValuesList.add(new AggregationTypeValue("DiscreteValuesHistogram", true));
Assert.assertEquals(expectedAggregationTypeValuesList, outputAggregationTypeValuesList);
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class DataCleansingMapReduceTest method testPartitionConsuming.
@Test
public void testPartitionConsuming() throws Exception {
ApplicationManager applicationManager = deployApplication(DataCleansing.class);
ServiceManager serviceManager = applicationManager.getServiceManager(DataCleansingService.NAME).start();
serviceManager.waitForStatus(true);
URL serviceURL = serviceManager.getServiceURL();
// write a set of records to one partition and run the DataCleansingMapReduce job on that one partition
createPartition(serviceURL, RECORD_SET1);
// before starting the MR, there are 0 invalid records and 0 valid records, according to metrics
Assert.assertEquals(0, getValidityMetrics(true));
Assert.assertEquals(0, getValidityMetrics(false));
Long now = System.currentTimeMillis();
ImmutableMap<String, String> args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(RECORD_SET1, true));
compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(RECORD_SET1, false));
// assert that some of the records have indeed been filtered
Assert.assertNotEquals(filterRecords(RECORD_SET1, true), RECORD_SET1);
Assert.assertNotEquals(filterRecords(RECORD_SET1, false), Collections.<String>emptySet());
// verify this via metrics
Assert.assertEquals(1, getValidityMetrics(true));
Assert.assertEquals(1, getValidityMetrics(false));
// create two additional partitions
createPartition(serviceURL, RECORD_SET2);
createPartition(serviceURL, RECORD_SET3);
// running the MapReduce job now processes these two new partitions (RECORD_SET1 and RECORD_SET2) and creates a new
// partition with with the output
now = System.currentTimeMillis();
args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
ImmutableSet<String> recordSets2and3 = ImmutableSet.<String>builder().addAll(RECORD_SET2).addAll(RECORD_SET3).build();
compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(recordSets2and3, true));
compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(recordSets2and3, false));
// verify this via metrics
Assert.assertEquals(1, getValidityMetrics(true));
Assert.assertEquals(5, getValidityMetrics(false));
// running the MapReduce job without adding new partitions creates no additional output
now = System.currentTimeMillis();
args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
compareData(now, DataCleansing.CLEAN_RECORDS, Collections.<String>emptySet());
compareData(now, DataCleansing.INVALID_RECORDS, Collections.<String>emptySet());
// verify that the records were properly partitioned on their zip
DataSetManager<PartitionedFileSet> cleanRecords = getDataset(DataCleansing.CLEAN_RECORDS);
PartitionFilter filter = PartitionFilter.builder().addValueCondition("zip", 84125).build();
Assert.assertEquals(ImmutableSet.of(RECORD1, RECORD4, RECORD6), getDataFromFilter(cleanRecords.get(), filter));
filter = PartitionFilter.builder().addValueCondition("zip", 84126).build();
Assert.assertEquals(ImmutableSet.of(RECORD3, RECORD5), getDataFromFilter(cleanRecords.get(), filter));
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class LogAnalysisAppTest method test.
@Test
public void test() throws Exception {
// Deploy the App
ApplicationManager appManager = deployApplication(LogAnalysisApp.class);
// Send a stream events to the Stream
StreamManager streamManager = getStreamManager(LogAnalysisApp.LOG_STREAM);
streamManager.send(LOG_1);
streamManager.send(LOG_2);
streamManager.send(LOG_3);
// run the spark program
SparkManager sparkManager = appManager.getSparkManager(LogAnalysisApp.ResponseCounterSpark.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// run the mapreduce job
MapReduceManager mapReduceManager = appManager.getMapReduceManager(HitCounterProgram.class.getSimpleName()).start();
mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
// start and wait for services
ServiceManager hitCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.HIT_COUNTER_SERVICE);
ServiceManager responseCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.RESPONSE_COUNTER_SERVICE);
ServiceManager requestCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.REQUEST_COUNTER_SERVICE);
//Query for hit counts and verify it
URL totalHitsURL = new URL(hitCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.HitCounterServiceHandler.HIT_COUNTER_SERVICE_PATH);
HttpResponse response = HttpRequests.execute(HttpRequest.post(totalHitsURL).withBody("{\"url\":\"" + "/home.html" + "\"}").build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Assert.assertEquals(TOTAL_HITS_VALUE, response.getResponseBodyAsString());
// query for total responses for a response code and verify it
URL responseCodeURL = new URL(responseCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.ResponseCounterHandler.RESPONSE_COUNT_PATH + "/" + RESPONSE_CODE);
HttpRequest request = HttpRequest.get(responseCodeURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(TOTAL_RESPONSE_VALUE, response.getResponseBodyAsString());
// query to get partitions in the request count tpfs
URL requestCountFilsetsURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_COUNTER_PARTITIONS_PATH);
request = HttpRequest.get(requestCountFilsetsURL).build();
response = HttpRequests.execute(request);
TreeSet<String> partitions = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<TreeSet<String>>() {
}.getType());
Assert.assertEquals(1, partitions.size());
String partition = partitions.iterator().next();
//Query for the contents of the files in this partition and verify
URL requestFilesetContentURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_CONTENT_PATH);
response = HttpRequests.execute(HttpRequest.post(requestFilesetContentURL).withBody("{\"" + LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_PATH_HANDLER_KEY + "\":\"" + partition + "\"}").build());
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Map<String, Integer> responseMap = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<Map<String, Integer>>() {
}.getType());
Assert.assertTrue(responseMap.equals(TPFS_RESULT));
}
use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.
the class SparkPageRankAppTest method test.
@Test
public void test() throws Exception {
// Deploy the SparkPageRankApp
ApplicationManager appManager = deployApplication(SparkPageRankApp.class);
// Send a stream events to the Stream
StreamManager streamManager = getStreamManager(SparkPageRankApp.BACKLINK_URL_STREAM);
streamManager.send(Joiner.on(" ").join(URL_1, URL_2));
streamManager.send(Joiner.on(" ").join(URL_1, URL_3));
streamManager.send(Joiner.on(" ").join(URL_2, URL_1));
streamManager.send(Joiner.on(" ").join(URL_3, URL_1));
// Start service
ServiceManager serviceManager = appManager.getServiceManager(SparkPageRankApp.SERVICE_HANDLERS).start();
// Wait for service to start since the Spark program needs it
serviceManager.waitForStatus(true);
// Start the SparkPageRankProgram
SparkManager sparkManager = appManager.getSparkManager(SparkPageRankApp.PageRankSpark.class.getSimpleName()).start();
sparkManager.waitForFinish(60, TimeUnit.SECONDS);
// Run RanksCounter which will count the number of pages for a pr
MapReduceManager mapReduceManager = appManager.getMapReduceManager(SparkPageRankApp.RanksCounter.class.getSimpleName()).start();
mapReduceManager.waitForFinish(3, TimeUnit.MINUTES);
//Query for rank
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.RANKS_PATH);
HttpRequest request = HttpRequest.post(url).withBody(("{\"" + SparkPageRankApp.SparkPageRankServiceHandler.URL_KEY + "\":\"" + URL_1 + "\"}")).build();
HttpResponse response = HttpRequests.execute(request);
Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
Assert.assertEquals(RANK, response.getResponseBodyAsString());
// Request total pages for a page rank and verify it
url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.TOTAL_PAGES_PATH + "/" + RANK);
response = HttpRequests.execute(HttpRequest.get(url).build());
Assert.assertEquals(TOTAL_PAGES, response.getResponseBodyAsString());
}
Aggregations