Search in sources :

Example 46 with StreamManager

use of co.cask.cdap.test.StreamManager in project cdap by caskdata.

the class SparkKMeansAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the Application
    ApplicationManager appManager = deployApplication(SparkKMeansApp.class);
    // Start the Flow
    FlowManager flowManager = appManager.getFlowManager("PointsFlow").start();
    // Send a few points to the stream
    StreamManager streamManager = getStreamManager("pointsStream");
    streamManager.send("10.6 519.2 110.3");
    streamManager.send("10.6 518.1 110.1");
    streamManager.send("10.6 519.6 109.9");
    streamManager.send("10.6 517.9 108.9");
    streamManager.send("10.7 518 109.2");
    // Wait for the events to be processed, or at most 5 seconds
    RuntimeMetrics metrics = flowManager.getFlowletMetrics("reader");
    metrics.waitForProcessed(3, 5, TimeUnit.SECONDS);
    // Start a Spark Program
    SparkManager sparkManager = appManager.getSparkManager("SparkKMeansProgram").start();
    sparkManager.waitForFinish(60, TimeUnit.SECONDS);
    flowManager.stop();
    // Start CentersService
    ServiceManager serviceManager = appManager.getServiceManager(SparkKMeansApp.CentersService.SERVICE_NAME).start();
    // Wait service startup
    serviceManager.waitForStatus(true);
    // Request data and verify it
    String response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/1"));
    String[] coordinates = response.split(",");
    Assert.assertTrue(coordinates.length == 3);
    for (String coordinate : coordinates) {
        double value = Double.parseDouble(coordinate);
        Assert.assertTrue(value > 0);
    }
    // Request data by incorrect index and verify response
    URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/10");
    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
    try {
        Assert.assertEquals(HttpURLConnection.HTTP_NO_CONTENT, conn.getResponseCode());
    } finally {
        conn.disconnect();
    }
}
Also used : FlowManager(co.cask.cdap.test.FlowManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) HttpURLConnection(java.net.HttpURLConnection) StreamManager(co.cask.cdap.test.StreamManager) RuntimeMetrics(co.cask.cdap.api.metrics.RuntimeMetrics) ServiceManager(co.cask.cdap.test.ServiceManager) URL(java.net.URL) Test(org.junit.Test)

Example 47 with StreamManager

use of co.cask.cdap.test.StreamManager in project cdap by caskdata.

the class DataPipelineTest method testSinglePhaseWithSparkSink.

private void testSinglePhaseWithSparkSink() throws Exception {
    /*
     * source1 ---|
     *            |--> sparksink
     * source2 ---|
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin("messages1", SpamMessage.SCHEMA))).addStage(new ETLStage("source2", MockSource.getPlugin("messages2", SpamMessage.SCHEMA))).addStage(new ETLStage("customsink", new ETLPlugin(NaiveBayesTrainer.PLUGIN_NAME, SparkSink.PLUGIN_TYPE, ImmutableMap.of("fileSetName", "modelFileSet", "path", "output", "fieldToClassify", SpamMessage.TEXT_FIELD, "predictionField", SpamMessage.SPAM_PREDICTION_FIELD), null))).addConnection("source1", "customsink").addConnection("source2", "customsink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("SparkSinkApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // set up five spam messages and five non-spam messages to be used for classification
    List<StructuredRecord> messagesToWrite = new ArrayList<>();
    messagesToWrite.add(new SpamMessage("buy our clothes", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("sell your used books to us", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("earn money for free", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is definitely not spam", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("you won the lottery", 1.0).toStructuredRecord());
    // write records to source1
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages1"));
    MockSource.writeInput(inputManager, messagesToWrite);
    messagesToWrite.clear();
    messagesToWrite.add(new SpamMessage("how was your day", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("what are you up to", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is a genuine message", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is an even more genuine message", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("could you send me the report", 0.0).toStructuredRecord());
    // write records to source2
    inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages2"));
    MockSource.writeInput(inputManager, messagesToWrite);
    // ingest in some messages to be classified
    StreamManager textsToClassify = getStreamManager(NaiveBayesTrainer.TEXTS_TO_CLASSIFY);
    textsToClassify.send("how are you doing today");
    textsToClassify.send("free money money");
    textsToClassify.send("what are you doing today");
    textsToClassify.send("genuine report");
    // manually trigger the pipeline
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> classifiedTexts = getDataset(NaiveBayesTrainer.CLASSIFIED_TEXTS);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("how are you doing today")), 0.01d);
    // only 'free money money' should be predicated as spam
    Assert.assertEquals(1.0d, Bytes.toDouble(classifiedTexts.get().read("free money money")), 0.01d);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("what are you doing today")), 0.01d);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("genuine report")), 0.01d);
    validateMetric(5, appId, "source1.records.out");
    validateMetric(5, appId, "source2.records.out");
    validateMetric(10, appId, "customsink.records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) SpamMessage(co.cask.cdap.datapipeline.mock.SpamMessage) WorkflowManager(co.cask.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Aggregations

StreamManager (co.cask.cdap.test.StreamManager)47 ApplicationManager (co.cask.cdap.test.ApplicationManager)41 Test (org.junit.Test)39 SparkManager (co.cask.cdap.test.SparkManager)19 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)17 FlowManager (co.cask.cdap.test.FlowManager)17 ServiceManager (co.cask.cdap.test.ServiceManager)15 RuntimeMetrics (co.cask.cdap.api.metrics.RuntimeMetrics)12 URL (java.net.URL)12 MapReduceManager (co.cask.cdap.test.MapReduceManager)9 ImmutableSet (com.google.common.collect.ImmutableSet)7 Set (java.util.Set)7 HttpURLConnection (java.net.HttpURLConnection)6 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)5 EntityId (co.cask.cdap.proto.id.EntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 Gson (com.google.gson.Gson)5 EnumSet (java.util.EnumSet)5 HashSet (java.util.HashSet)5 Category (org.junit.experimental.categories.Category)5