Search in sources :

Example 1 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class DataQualityAppTest method testMeanContentLength.

@Test
public void testMeanContentLength() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("Mean");
    testMap.put("content_length", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "avg", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp2");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    ServiceManager serviceManager = applicationManager.getServiceManager(DataQualityService.SERVICE_NAME).start();
    serviceManager.waitForStatus(true);
    /* Test for aggregationsGetter handler */
    URL url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length/aggregations/Mean/timeseries");
    HttpResponse httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
    String response = httpResponse.getResponseBodyAsString();
    List<TimestampValue> tsValueListActual = GSON.fromJson(response, TOKEN_TYPE_LIST_TIMESTAMP_VALUE);
    TimestampValue firstTimestampValue = tsValueListActual.get(0);
    Assert.assertEquals(256.0, firstTimestampValue.getValue());
    serviceManager.stop();
    serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) ServiceManager(co.cask.cdap.test.ServiceManager) TimestampValue(co.cask.cdap.dq.TimestampValue) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Example 2 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class DataQualityAppTest method testTotals.

@Test
public void testTotals() throws Exception {
    Map<String, Set<String>> testMap = new HashMap<>();
    Set<String> testSet = new HashSet<>();
    testSet.add("DiscreteValuesHistogram");
    testMap.put("content_length", testSet);
    testMap.put("status", testSet);
    testMap.put("request_time", testSet);
    DataQualityApp.DataQualityConfig config = new DataQualityApp.DataQualityConfig(WORKFLOW_SCHEDULE_MINUTES, getStreamSource(), "histogram", testMap);
    ApplicationId appId = NamespaceId.DEFAULT.app("newApp3");
    AppRequest<DataQualityApp.DataQualityConfig> appRequest = new AppRequest<>(new ArtifactSummary(appArtifact.getArtifact(), appArtifact.getVersion()), config);
    ApplicationManager applicationManager = deployApplication(appId, appRequest);
    MapReduceManager mrManager = applicationManager.getMapReduceManager("FieldAggregator").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
    Map<String, Integer> expectedMap = new HashMap<>();
    expectedMap.put("256", 3);
    /* Test for the aggregationsGetter handler */
    ServiceManager serviceManager = applicationManager.getServiceManager(DataQualityService.SERVICE_NAME).start();
    serviceManager.waitForStatus(true);
    URL url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length/aggregations/DiscreteValuesHistogram/totals");
    HttpResponse httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
    String response = httpResponse.getResponseBodyAsString();
    Map<String, Integer> histogramMap = GSON.fromJson(response, TOKEN_TYPE_MAP_STRING_INTEGER);
    Assert.assertEquals(expectedMap, histogramMap);
    /* Test for the fieldsGetter handler */
    url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields");
    httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
    response = httpResponse.getResponseBodyAsString();
    Set<FieldDetail> outputSet = GSON.fromJson(response, TOKEN_TYPE_SET_FIELD_DETAIL);
    Set<FieldDetail> expectedSet = new HashSet<>();
    AggregationTypeValue aggregationTypeValue = new AggregationTypeValue("DiscreteValuesHistogram", true);
    Set<AggregationTypeValue> aggregationTypeValuesList = Sets.newHashSet(aggregationTypeValue);
    expectedSet.add(new FieldDetail("content_length", aggregationTypeValuesList));
    expectedSet.add(new FieldDetail("request_time", aggregationTypeValuesList));
    expectedSet.add(new FieldDetail("status", aggregationTypeValuesList));
    Assert.assertEquals(expectedSet, outputSet);
    /* Test for the aggregationTypesGetter handler */
    url = new URL(serviceManager.getServiceURL(), "v1/sources/logStream/fields/content_length");
    httpResponse = HttpRequests.execute(HttpRequest.get(url).build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, httpResponse.getResponseCode());
    response = httpResponse.getResponseBodyAsString();
    List<AggregationTypeValue> expectedAggregationTypeValuesList = new ArrayList<>();
    List<AggregationTypeValue> outputAggregationTypeValuesList = GSON.fromJson(response, TOKEN_TYPE_LIST_AGGREGATION_TYPE_VALUES);
    expectedAggregationTypeValuesList.add(new AggregationTypeValue("DiscreteValuesHistogram", true));
    Assert.assertEquals(expectedAggregationTypeValuesList, outputAggregationTypeValuesList);
    serviceManager.stop();
    serviceManager.waitForRun(ProgramRunStatus.KILLED, 180, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) HashSet(java.util.HashSet) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) URL(java.net.URL) ServiceManager(co.cask.cdap.test.ServiceManager) AggregationTypeValue(co.cask.cdap.dq.AggregationTypeValue) HashSet(java.util.HashSet) HttpResponse(co.cask.common.http.HttpResponse) FieldDetail(co.cask.cdap.dq.FieldDetail) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) ApplicationId(co.cask.cdap.proto.id.ApplicationId) DataQualityApp(co.cask.cdap.dq.DataQualityApp) Test(org.junit.Test)

Example 3 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class DataCleansingMapReduceTest method testPartitionConsuming.

@Test
public void testPartitionConsuming() throws Exception {
    ApplicationManager applicationManager = deployApplication(DataCleansing.class);
    ServiceManager serviceManager = applicationManager.getServiceManager(DataCleansingService.NAME).start();
    serviceManager.waitForStatus(true);
    URL serviceURL = serviceManager.getServiceURL();
    // write a set of records to one partition and run the DataCleansingMapReduce job on that one partition
    createPartition(serviceURL, RECORD_SET1);
    // before starting the MR, there are 0 invalid records and 0 valid records, according to metrics
    Assert.assertEquals(0, getValidityMetrics(true));
    Assert.assertEquals(0, getValidityMetrics(false));
    Long now = System.currentTimeMillis();
    ImmutableMap<String, String> args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(RECORD_SET1, true));
    compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(RECORD_SET1, false));
    // assert that some of the records have indeed been filtered
    Assert.assertNotEquals(filterRecords(RECORD_SET1, true), RECORD_SET1);
    Assert.assertNotEquals(filterRecords(RECORD_SET1, false), Collections.<String>emptySet());
    // verify this via metrics
    Assert.assertEquals(1, getValidityMetrics(true));
    Assert.assertEquals(1, getValidityMetrics(false));
    // create two additional partitions
    createPartition(serviceURL, RECORD_SET2);
    createPartition(serviceURL, RECORD_SET3);
    // running the MapReduce job now processes these two new partitions (RECORD_SET1 and RECORD_SET2) and creates a new
    // partition with with the output
    now = System.currentTimeMillis();
    args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
    ImmutableSet<String> recordSets2and3 = ImmutableSet.<String>builder().addAll(RECORD_SET2).addAll(RECORD_SET3).build();
    compareData(now, DataCleansing.CLEAN_RECORDS, filterRecords(recordSets2and3, true));
    compareData(now, DataCleansing.INVALID_RECORDS, filterRecords(recordSets2and3, false));
    // verify this via metrics
    Assert.assertEquals(1, getValidityMetrics(true));
    Assert.assertEquals(5, getValidityMetrics(false));
    // running the MapReduce job without adding new partitions creates no additional output
    now = System.currentTimeMillis();
    args = ImmutableMap.of(DataCleansingMapReduce.OUTPUT_PARTITION_KEY, now.toString(), DataCleansingMapReduce.SCHEMA_KEY, schemaJson);
    mapReduceManager = applicationManager.getMapReduceManager(DataCleansingMapReduce.NAME).start(args);
    mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 3, 5, TimeUnit.MINUTES);
    compareData(now, DataCleansing.CLEAN_RECORDS, Collections.<String>emptySet());
    compareData(now, DataCleansing.INVALID_RECORDS, Collections.<String>emptySet());
    // verify that the records were properly partitioned on their zip
    DataSetManager<PartitionedFileSet> cleanRecords = getDataset(DataCleansing.CLEAN_RECORDS);
    PartitionFilter filter = PartitionFilter.builder().addValueCondition("zip", 84125).build();
    Assert.assertEquals(ImmutableSet.of(RECORD1, RECORD4, RECORD6), getDataFromFilter(cleanRecords.get(), filter));
    filter = PartitionFilter.builder().addValueCondition("zip", 84126).build();
    Assert.assertEquals(ImmutableSet.of(RECORD3, RECORD5), getDataFromFilter(cleanRecords.get(), filter));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) ServiceManager(co.cask.cdap.test.ServiceManager) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) URL(java.net.URL) Test(org.junit.Test)

Example 4 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class LogAnalysisAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the App
    ApplicationManager appManager = deployApplication(LogAnalysisApp.class);
    // Send a stream events to the Stream
    StreamManager streamManager = getStreamManager(LogAnalysisApp.LOG_STREAM);
    streamManager.send(LOG_1);
    streamManager.send(LOG_2);
    streamManager.send(LOG_3);
    // run the spark program
    SparkManager sparkManager = appManager.getSparkManager(LogAnalysisApp.ResponseCounterSpark.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // run the mapreduce job
    MapReduceManager mapReduceManager = appManager.getMapReduceManager(HitCounterProgram.class.getSimpleName()).start();
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    // start and wait for services
    ServiceManager hitCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.HIT_COUNTER_SERVICE);
    ServiceManager responseCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.RESPONSE_COUNTER_SERVICE);
    ServiceManager requestCounterServiceManager = getServiceManager(appManager, LogAnalysisApp.REQUEST_COUNTER_SERVICE);
    //Query for hit counts and verify it
    URL totalHitsURL = new URL(hitCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.HitCounterServiceHandler.HIT_COUNTER_SERVICE_PATH);
    HttpResponse response = HttpRequests.execute(HttpRequest.post(totalHitsURL).withBody("{\"url\":\"" + "/home.html" + "\"}").build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
    Assert.assertEquals(TOTAL_HITS_VALUE, response.getResponseBodyAsString());
    // query for total responses for a response code and verify it
    URL responseCodeURL = new URL(responseCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.ResponseCounterHandler.RESPONSE_COUNT_PATH + "/" + RESPONSE_CODE);
    HttpRequest request = HttpRequest.get(responseCodeURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(TOTAL_RESPONSE_VALUE, response.getResponseBodyAsString());
    // query to get partitions in the request count tpfs
    URL requestCountFilsetsURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_COUNTER_PARTITIONS_PATH);
    request = HttpRequest.get(requestCountFilsetsURL).build();
    response = HttpRequests.execute(request);
    TreeSet<String> partitions = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<TreeSet<String>>() {
    }.getType());
    Assert.assertEquals(1, partitions.size());
    String partition = partitions.iterator().next();
    //Query for the contents of the files in this partition and verify
    URL requestFilesetContentURL = new URL(requestCounterServiceManager.getServiceURL(15, TimeUnit.SECONDS), LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_CONTENT_PATH);
    response = HttpRequests.execute(HttpRequest.post(requestFilesetContentURL).withBody("{\"" + LogAnalysisApp.RequestCounterHandler.REQUEST_FILE_PATH_HANDLER_KEY + "\":\"" + partition + "\"}").build());
    Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
    Map<String, Integer> responseMap = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<Map<String, Integer>>() {
    }.getType());
    Assert.assertTrue(responseMap.equals(TPFS_RESULT));
}
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) StreamManager(co.cask.cdap.test.StreamManager) ServiceManager(co.cask.cdap.test.ServiceManager) TypeToken(com.google.common.reflect.TypeToken) Test(org.junit.Test)

Example 5 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class SparkPageRankAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the SparkPageRankApp
    ApplicationManager appManager = deployApplication(SparkPageRankApp.class);
    // Send a stream events to the Stream
    StreamManager streamManager = getStreamManager(SparkPageRankApp.BACKLINK_URL_STREAM);
    streamManager.send(Joiner.on(" ").join(URL_1, URL_2));
    streamManager.send(Joiner.on(" ").join(URL_1, URL_3));
    streamManager.send(Joiner.on(" ").join(URL_2, URL_1));
    streamManager.send(Joiner.on(" ").join(URL_3, URL_1));
    // Start service
    ServiceManager serviceManager = appManager.getServiceManager(SparkPageRankApp.SERVICE_HANDLERS).start();
    // Wait for service to start since the Spark program needs it
    serviceManager.waitForStatus(true);
    // Start the SparkPageRankProgram
    SparkManager sparkManager = appManager.getSparkManager(SparkPageRankApp.PageRankSpark.class.getSimpleName()).start();
    sparkManager.waitForFinish(60, TimeUnit.SECONDS);
    // Run RanksCounter which will count the number of pages for a pr
    MapReduceManager mapReduceManager = appManager.getMapReduceManager(SparkPageRankApp.RanksCounter.class.getSimpleName()).start();
    mapReduceManager.waitForFinish(3, TimeUnit.MINUTES);
    //Query for rank
    URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.RANKS_PATH);
    HttpRequest request = HttpRequest.post(url).withBody(("{\"" + SparkPageRankApp.SparkPageRankServiceHandler.URL_KEY + "\":\"" + URL_1 + "\"}")).build();
    HttpResponse response = HttpRequests.execute(request);
    Assert.assertEquals(HttpURLConnection.HTTP_OK, response.getResponseCode());
    Assert.assertEquals(RANK, response.getResponseBodyAsString());
    // Request total pages for a page rank and verify it
    url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), SparkPageRankApp.SparkPageRankServiceHandler.TOTAL_PAGES_PATH + "/" + RANK);
    response = HttpRequests.execute(HttpRequest.get(url).build());
    Assert.assertEquals(TOTAL_PAGES, response.getResponseBodyAsString());
}
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) StreamManager(co.cask.cdap.test.StreamManager) ServiceManager(co.cask.cdap.test.ServiceManager) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) Test(org.junit.Test)

Aggregations

MapReduceManager (co.cask.cdap.test.MapReduceManager)24 ApplicationManager (co.cask.cdap.test.ApplicationManager)22 Test (org.junit.Test)21 ServiceManager (co.cask.cdap.test.ServiceManager)13 StreamManager (co.cask.cdap.test.StreamManager)9 URL (java.net.URL)9 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)8 SparkManager (co.cask.cdap.test.SparkManager)5 HttpResponse (co.cask.common.http.HttpResponse)5 Location (org.apache.twill.filesystem.Location)5 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)4 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)4 FlowManager (co.cask.cdap.test.FlowManager)4 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 DataQualityApp (co.cask.cdap.dq.DataQualityApp)3 AppRequest (co.cask.cdap.proto.artifact.AppRequest)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 HttpRequest (co.cask.common.http.HttpRequest)3