Examples with MapReduceManager - co.cask.cdap.test.MapReduceManager

Example 11 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class TestFrameworkTestRun method testCrossNSMapperDatasetAccess.

@Category(SlowTests.class)
@Test
public void testCrossNSMapperDatasetAccess() throws Exception {
    NamespaceMeta inputNS = new NamespaceMeta.Builder().setName("inputNS").build();
    NamespaceMeta outputNS = new NamespaceMeta.Builder().setName("outputNS").build();
    getNamespaceAdmin().create(inputNS);
    getNamespaceAdmin().create(outputNS);
    addDatasetInstance(inputNS.getNamespaceId().dataset("table1"), "keyValueTable");
    addDatasetInstance(outputNS.getNamespaceId().dataset("table2"), "keyValueTable");
    DataSetManager<KeyValueTable> tableManager = getDataset(inputNS.getNamespaceId().dataset("table1"));
    KeyValueTable inputTable = tableManager.get();
    inputTable.write("hello", "world");
    tableManager.flush();
    ApplicationManager appManager = deployApplication(DatasetCrossNSAccessWithMAPApp.class);
    Map<String, String> argsForMR = ImmutableMap.of(DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NS, inputNS.getName(), DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NAME, "table1", DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NS, outputNS.getName(), DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NAME, "table2");
    MapReduceManager mrManager = appManager.getMapReduceManager(DatasetCrossNSAccessWithMAPApp.MAPREDUCE_PROGRAM).start(argsForMR);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset(outputNS.getNamespaceId().dataset("table2"));
    verifyMapperJobOutput(DatasetCrossNSAccessWithMAPApp.class, outTableManager);
}

Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 12 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class TestFrameworkTestRun method testClusterName.

@Test
public void testClusterName() throws Exception {
    String clusterName = getConfiguration().get(Constants.CLUSTER_NAME);
    ApplicationManager appManager = deployApplication(ClusterNameTestApp.class);
    final DataSetManager<KeyValueTable> datasetManager = getDataset(ClusterNameTestApp.CLUSTER_NAME_TABLE);
    final KeyValueTable clusterNameTable = datasetManager.get();
    // A callable for reading the cluster name from the ClusterNameTable.
    // It is used for Tasks.waitFor call down below.
    final AtomicReference<String> key = new AtomicReference<>();
    Callable<String> readClusterName = new Callable<String>() {

        @Nullable
        @Override
        public String call() throws Exception {
            datasetManager.flush();
            byte[] bytes = clusterNameTable.read(key.get());
            return bytes == null ? null : new String(bytes, StandardCharsets.UTF_8);
        }
    };
    // Service
    ServiceManager serviceManager = appManager.getServiceManager(ClusterNameTestApp.ClusterNameServiceHandler.class.getSimpleName()).start();
    Assert.assertEquals(clusterName, callServiceGet(serviceManager.getServiceURL(10, TimeUnit.SECONDS), "clusterName"));
    serviceManager.stop();
    // Worker
    WorkerManager workerManager = appManager.getWorkerManager(ClusterNameTestApp.ClusterNameWorker.class.getSimpleName()).start();
    key.set("worker.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    // The worker will stop by itself. No need to call stop
    workerManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.SECONDS);
    // Flow
    FlowManager flowManager = appManager.getFlowManager(ClusterNameTestApp.ClusterNameFlow.class.getSimpleName()).start();
    key.set("flow.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    flowManager.stop();
    // MapReduce
    // Setup the input file used by MR
    Location location = this.<FileSet>getDataset(ClusterNameTestApp.INPUT_FILE_SET).get().getLocation("input");
    try (PrintStream printer = new PrintStream(location.getOutputStream(), true, "UTF-8")) {
        for (int i = 0; i < 10; i++) {
            printer.println("Hello World " + i);
        }
    }
    // Setup input and output dataset arguments
    Map<String, String> inputArgs = new HashMap<>();
    FileSetArguments.setInputPath(inputArgs, "input");
    Map<String, String> outputArgs = new HashMap<>();
    FileSetArguments.setOutputPath(outputArgs, "output");
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, ClusterNameTestApp.INPUT_FILE_SET, inputArgs));
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, ClusterNameTestApp.OUTPUT_FILE_SET, outputArgs));
    MapReduceManager mrManager = appManager.getMapReduceManager(ClusterNameTestApp.ClusterNameMapReduce.class.getSimpleName()).start(args);
    key.set("mr.client.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set("mapper.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set("reducer.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // Spark
    SparkManager sparkManager = appManager.getSparkManager(ClusterNameTestApp.ClusterNameSpark.class.getSimpleName()).start();
    key.set("spark.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // Workflow
    // Cleanup the output path for the MR job in the workflow first
    this.<FileSet>getDataset(ClusterNameTestApp.OUTPUT_FILE_SET).get().getLocation("output").delete(true);
    args = RuntimeArguments.addScope(Scope.MAPREDUCE, ClusterNameTestApp.ClusterNameMapReduce.class.getSimpleName(), args);
    WorkflowManager workflowManager = appManager.getWorkflowManager(ClusterNameTestApp.ClusterNameWorkflow.class.getSimpleName()).start(args);
    String prefix = ClusterNameTestApp.ClusterNameWorkflow.class.getSimpleName() + ".";
    key.set(prefix + "mr.client.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set(prefix + "mapper.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set(prefix + "reducer.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set(prefix + "spark.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    key.set(prefix + "action.cluster.name");
    Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
}

Also used : FlowManager(co.cask.cdap.test.FlowManager) PrintStream(java.io.PrintStream) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) MapReduceManager(co.cask.cdap.test.MapReduceManager) HashMap(java.util.HashMap) WorkflowManager(co.cask.cdap.test.WorkflowManager) AtomicReference(java.util.concurrent.atomic.AtomicReference) Callable(java.util.concurrent.Callable) WorkerManager(co.cask.cdap.test.WorkerManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ServiceManager(co.cask.cdap.test.ServiceManager) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 13 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class TestFrameworkTestRun method testApp.

// todo: passing stream name as a workaround for not cleaning up streams during reset()
private void testApp(Class<? extends Application> app, String streamName) throws Exception {
    ApplicationManager applicationManager = deployApplication(app);
    FlowManager flowManager = applicationManager.getFlowManager("WordCountFlow").start();
    // Send some inputs to streams
    StreamManager streamManager = getStreamManager(streamName);
    for (int i = 0; i < 100; i++) {
        streamManager.send(ImmutableMap.of("title", "title " + i), "testing message " + i);
    }
    // Check the flowlet metrics
    RuntimeMetrics flowletMetrics = flowManager.getFlowletMetrics("CountByField");
    flowletMetrics.waitForProcessed(500, 10, TimeUnit.SECONDS);
    Assert.assertEquals(0L, flowletMetrics.getException());
    // Query the result
    ServiceManager serviceManager = applicationManager.getServiceManager("WordFrequency").start();
    serviceManager.waitForStatus(true, 2, 1);
    // Verify the query result
    Type resultType = new TypeToken<Map<String, Long>>() {
    }.getType();
    Map<String, Long> result = new Gson().fromJson(callServiceGet(serviceManager.getServiceURL(), "wordfreq/" + streamName + ":testing"), resultType);
    Assert.assertNotNull(result);
    Assert.assertEquals(100L, result.get(streamName + ":testing").longValue());
    // check the metrics
    RuntimeMetrics serviceMetrics = serviceManager.getMetrics();
    serviceMetrics.waitForProcessed(1, 5, TimeUnit.SECONDS);
    Assert.assertEquals(0L, serviceMetrics.getException());
    // Run mapreduce job
    MapReduceManager mrManager = applicationManager.getMapReduceManager("countTotal").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 1800L, TimeUnit.SECONDS);
    long totalCount = Long.valueOf(callServiceGet(serviceManager.getServiceURL(), "total"));
    // every event has 5 tokens
    Assert.assertEquals(5 * 100L, totalCount);
    // Run mapreduce from stream
    mrManager = applicationManager.getMapReduceManager("countFromStream").start();
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 120L, TimeUnit.SECONDS);
    totalCount = Long.valueOf(callServiceGet(serviceManager.getServiceURL(), "stream_total"));
    // The stream MR only consume the body, not the header.
    Assert.assertEquals(3 * 100L, totalCount);
    DataSetManager<MyKeyValueTableDefinition.KeyValueTable> mydatasetManager = getDataset("mydataset");
    Assert.assertEquals(100L, Long.valueOf(mydatasetManager.get().get("title:title")).longValue());
    // also test the deprecated version of getDataset(). This can be removed when we remove the method
    mydatasetManager = getDataset("mydataset");
    Assert.assertEquals(100L, Long.valueOf(mydatasetManager.get().get("title:title")).longValue());
}

Also used : FlowManager(co.cask.cdap.test.FlowManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) RuntimeMetrics(co.cask.cdap.api.metrics.RuntimeMetrics) Gson(com.google.gson.Gson) Type(java.lang.reflect.Type) StreamManager(co.cask.cdap.test.StreamManager) ServiceManager(co.cask.cdap.test.ServiceManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 14 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class PurchaseAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the PurchaseApp application
    ApplicationManager appManager = deployApplication(PurchaseApp.class);
    // Start PurchaseFlow
    FlowManager flowManager = appManager.getFlowManager("PurchaseFlow").start();
    // Send stream events to the "purchaseStream" Stream
    StreamManager streamManager = getStreamManager("purchaseStream");
    streamManager.send("bob bought 3 apples for $30");
    streamManager.send("joe bought 1 apple for $100");
    streamManager.send("joe bought 10 pineapples for $20");
    streamManager.send("cat bought 3 bottles for $12");
    streamManager.send("cat bought 2 pops for $14");
    try {
        // Wait for the last Flowlet processing 5 events, or at most 15 seconds
        RuntimeMetrics metrics = flowManager.getFlowletMetrics("collector");
        metrics.waitForProcessed(5, 15, TimeUnit.SECONDS);
    } finally {
        flowManager.stop();
    }
    ServiceManager userProfileServiceManager = getUserProfileServiceManager(appManager);
    // Add customer's profile information
    URL userProfileUrl = new URL(userProfileServiceManager.getServiceURL(15, TimeUnit.SECONDS), UserProfileServiceHandler.USER_ENDPOINT);
    HttpURLConnection userProfileConnection = (HttpURLConnection) userProfileUrl.openConnection();
    String userProfileJson = "{'id' : 'joe', 'firstName': 'joe', 'lastName':'bernard', 'categories': ['fruits']}";
    try {
        userProfileConnection.setDoOutput(true);
        userProfileConnection.setRequestMethod("POST");
        userProfileConnection.getOutputStream().write(userProfileJson.getBytes(Charsets.UTF_8));
        Assert.assertEquals(HttpURLConnection.HTTP_OK, userProfileConnection.getResponseCode());
    } finally {
        userProfileConnection.disconnect();
    }
    // Test service to retrieve customer's profile information
    userProfileUrl = new URL(userProfileServiceManager.getServiceURL(15, TimeUnit.SECONDS), UserProfileServiceHandler.USER_ENDPOINT + "/joe");
    userProfileConnection = (HttpURLConnection) userProfileUrl.openConnection();
    Assert.assertEquals(HttpURLConnection.HTTP_OK, userProfileConnection.getResponseCode());
    String customerJson;
    try {
        customerJson = new String(ByteStreams.toByteArray(userProfileConnection.getInputStream()), Charsets.UTF_8);
    } finally {
        userProfileConnection.disconnect();
    }
    UserProfile profileFromService = GSON.fromJson(customerJson, UserProfile.class);
    Assert.assertEquals(profileFromService.getFirstName(), "joe");
    Assert.assertEquals(profileFromService.getLastName(), "bernard");
    // Run PurchaseHistoryWorkflow which will process the data
    MapReduceManager mapReduceManager = appManager.getMapReduceManager(PurchaseHistoryBuilder.class.getSimpleName()).start();
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    // Start PurchaseHistoryService
    ServiceManager purchaseHistoryServiceManager = appManager.getServiceManager(PurchaseHistoryService.SERVICE_NAME).start();
    // Wait for service startup
    purchaseHistoryServiceManager.waitForStatus(true);
    // Test service to retrieve a customer's purchase history
    URL url = new URL(purchaseHistoryServiceManager.getServiceURL(15, TimeUnit.SECONDS), "history/joe");
    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
    Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
    String historyJson;
    try {
        historyJson = new String(ByteStreams.toByteArray(conn.getInputStream()), Charsets.UTF_8);
    } finally {
        conn.disconnect();
    }
    PurchaseHistory history = GSON.fromJson(historyJson, PurchaseHistory.class);
    Assert.assertEquals("joe", history.getCustomer());
    Assert.assertEquals(2, history.getPurchases().size());
    UserProfile profileFromPurchaseHistory = history.getUserProfile();
    Assert.assertEquals(profileFromPurchaseHistory.getFirstName(), "joe");
    Assert.assertEquals(profileFromPurchaseHistory.getLastName(), "bernard");
}

Also used : FlowManager(co.cask.cdap.test.FlowManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) HttpURLConnection(java.net.HttpURLConnection) MapReduceManager(co.cask.cdap.test.MapReduceManager) StreamManager(co.cask.cdap.test.StreamManager) RuntimeMetrics(co.cask.cdap.api.metrics.RuntimeMetrics) ServiceManager(co.cask.cdap.test.ServiceManager) URL(java.net.URL) Test(org.junit.Test)

Example 15 with MapReduceManager

use of co.cask.cdap.test.MapReduceManager in project cdap by caskdata.

the class FileSetWordCountTest method testWordCountOnFileSet.

@Test
public void testWordCountOnFileSet() throws Exception {
    // deploy the application
    ApplicationManager applicationManager = deployApplication(FileSetExample.class);
    final String line1 = "a b a";
    final String line2 = "b a b";
    // discover the file set service
    ServiceManager serviceManager = applicationManager.getServiceManager("FileSetService").start();
    serviceManager.waitForStatus(true);
    URL serviceURL = serviceManager.getServiceURL();
    // write a file to the file set using the service
    HttpURLConnection connection = (HttpURLConnection) new URL(serviceURL, "lines?path=nn.1").openConnection();
    try {
        connection.setDoOutput(true);
        connection.setRequestMethod("PUT");
        connection.getOutputStream().write(line1.getBytes(Charsets.UTF_8));
        Assert.assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode());
    } finally {
        connection.disconnect();
    }
    // run word count over that file only
    Map<String, String> runtimeArguments = Maps.newHashMap();
    Map<String, String> inputArgs = Maps.newHashMap();
    FileSetArguments.setInputPaths(inputArgs, "nn.1");
    Map<String, String> outputArgs = Maps.newHashMap();
    FileSetArguments.setOutputPath(outputArgs, "out.1");
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "lines", inputArgs));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "counts", outputArgs));
    MapReduceManager mapReduceManager = applicationManager.getMapReduceManager("WordCount").start(runtimeArguments);
    mapReduceManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // retrieve the counts through the service and verify
    Map<String, Integer> counts = Maps.newHashMap();
    connection = (HttpURLConnection) new URL(serviceURL, "counts?path=out.1/part-r-00000").openConnection();
    try {
        connection.setRequestMethod("GET");
        Assert.assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode());
        readCounts(connection.getInputStream(), counts);
    } finally {
        connection.disconnect();
    }
    // "a b a" should yield "a":2, "b":1
    Assert.assertEquals(2, counts.size());
    Assert.assertEquals(new Integer(2), counts.get("a"));
    Assert.assertEquals(new Integer(1), counts.get("b"));
    // write a file to the file set using the dataset directly
    DataSetManager<FileSet> linesManager = getDataset("lines");
    OutputStream output = linesManager.get().getLocation("nn.2").getOutputStream();
    try {
        output.write(line2.getBytes(Charsets.UTF_8));
    } finally {
        output.close();
    }
    // run word count over both files
    FileSetArguments.setInputPath(inputArgs, "nn.1");
    FileSetArguments.addInputPath(inputArgs, "nn.2");
    FileSetArguments.setOutputPath(outputArgs, "out.2");
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "lines", inputArgs));
    runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, "counts", outputArgs));
    mapReduceManager = applicationManager.getMapReduceManager("WordCount").start(runtimeArguments);
    mapReduceManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
    // retrieve the counts through the dataset API and verify
    // write a file to the file set using the dataset directly
    DataSetManager<FileSet> countsManager = getDataset("counts");
    counts.clear();
    Location resultLocation = countsManager.get().getLocation("out.2");
    Assert.assertTrue(resultLocation.isDirectory());
    List<String> parts = new LinkedList<>();
    for (Location child : resultLocation.list()) {
        if (child.getName().startsWith("part-")) {
            // only read part files, no check sums or done files
            parts.add(child.getName());
            readCounts(child.getInputStream(), counts);
        }
    }
    // "a b a" and "b a b" should yield "a":3, "b":3
    Assert.assertEquals(2, counts.size());
    Assert.assertEquals(new Integer(3), counts.get("a"));
    Assert.assertEquals(new Integer(3), counts.get("b"));
    // retrieve the counts through the service
    counts.clear();
    for (String part : parts) {
        connection = (HttpURLConnection) new URL(serviceURL, "counts?path=out.2/" + part).openConnection();
        try {
            connection.setRequestMethod("GET");
            Assert.assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode());
            readCounts(connection.getInputStream(), counts);
        } finally {
            connection.disconnect();
        }
    }
    // "a b a" and "b a b" should yield "a":3, "b":3
    Assert.assertEquals(2, counts.size());
    Assert.assertEquals(new Integer(3), counts.get("a"));
    Assert.assertEquals(new Integer(3), counts.get("b"));
    serviceManager.stop();
}

Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) OutputStream(java.io.OutputStream) URL(java.net.URL) LinkedList(java.util.LinkedList) HttpURLConnection(java.net.HttpURLConnection) ServiceManager(co.cask.cdap.test.ServiceManager) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

MapReduceManager (co.cask.cdap.test.MapReduceManager)24 ApplicationManager (co.cask.cdap.test.ApplicationManager)22 Test (org.junit.Test)21 ServiceManager (co.cask.cdap.test.ServiceManager)13 StreamManager (co.cask.cdap.test.StreamManager)9 URL (java.net.URL)9 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)8 SparkManager (co.cask.cdap.test.SparkManager)5 HttpResponse (co.cask.common.http.HttpResponse)5 Location (org.apache.twill.filesystem.Location)5 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)4 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)4 FlowManager (co.cask.cdap.test.FlowManager)4 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 DataQualityApp (co.cask.cdap.dq.DataQualityApp)3 AppRequest (co.cask.cdap.proto.artifact.AppRequest)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 HttpRequest (co.cask.common.http.HttpRequest)3