use of co.cask.cdap.test.StreamManager in project cdap by caskdata.
the class SparkKMeansAppTest method test.
@Test
public void test() throws Exception {
// Deploy the Application
ApplicationManager appManager = deployApplication(SparkKMeansApp.class);
// Start the Flow
FlowManager flowManager = appManager.getFlowManager("PointsFlow").start();
// Send a few points to the stream
StreamManager streamManager = getStreamManager("pointsStream");
streamManager.send("10.6 519.2 110.3");
streamManager.send("10.6 518.1 110.1");
streamManager.send("10.6 519.6 109.9");
streamManager.send("10.6 517.9 108.9");
streamManager.send("10.7 518 109.2");
// Wait for the events to be processed, or at most 5 seconds
RuntimeMetrics metrics = flowManager.getFlowletMetrics("reader");
metrics.waitForProcessed(3, 5, TimeUnit.SECONDS);
// Start a Spark Program
SparkManager sparkManager = appManager.getSparkManager("SparkKMeansProgram").start();
sparkManager.waitForFinish(60, TimeUnit.SECONDS);
flowManager.stop();
// Start CentersService
ServiceManager serviceManager = appManager.getServiceManager(SparkKMeansApp.CentersService.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForStatus(true);
// Request data and verify it
String response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/1"));
String[] coordinates = response.split(",");
Assert.assertTrue(coordinates.length == 3);
for (String coordinate : coordinates) {
double value = Double.parseDouble(coordinate);
Assert.assertTrue(value > 0);
}
// Request data by incorrect index and verify response
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/10");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
Assert.assertEquals(HttpURLConnection.HTTP_NO_CONTENT, conn.getResponseCode());
} finally {
conn.disconnect();
}
}
use of co.cask.cdap.test.StreamManager in project cdap by caskdata.
the class DataPipelineTest method testSinglePhaseWithSparkSink.
private void testSinglePhaseWithSparkSink() throws Exception {
/*
* source1 ---|
* |--> sparksink
* source2 ---|
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin("messages1", SpamMessage.SCHEMA))).addStage(new ETLStage("source2", MockSource.getPlugin("messages2", SpamMessage.SCHEMA))).addStage(new ETLStage("customsink", new ETLPlugin(NaiveBayesTrainer.PLUGIN_NAME, SparkSink.PLUGIN_TYPE, ImmutableMap.of("fileSetName", "modelFileSet", "path", "output", "fieldToClassify", SpamMessage.TEXT_FIELD, "predictionField", SpamMessage.SPAM_PREDICTION_FIELD), null))).addConnection("source1", "customsink").addConnection("source2", "customsink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("SparkSinkApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
// set up five spam messages and five non-spam messages to be used for classification
List<StructuredRecord> messagesToWrite = new ArrayList<>();
messagesToWrite.add(new SpamMessage("buy our clothes", 1.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("sell your used books to us", 1.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("earn money for free", 1.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("this is definitely not spam", 1.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("you won the lottery", 1.0).toStructuredRecord());
// write records to source1
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages1"));
MockSource.writeInput(inputManager, messagesToWrite);
messagesToWrite.clear();
messagesToWrite.add(new SpamMessage("how was your day", 0.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("what are you up to", 0.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("this is a genuine message", 0.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("this is an even more genuine message", 0.0).toStructuredRecord());
messagesToWrite.add(new SpamMessage("could you send me the report", 0.0).toStructuredRecord());
// write records to source2
inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages2"));
MockSource.writeInput(inputManager, messagesToWrite);
// ingest in some messages to be classified
StreamManager textsToClassify = getStreamManager(NaiveBayesTrainer.TEXTS_TO_CLASSIFY);
textsToClassify.send("how are you doing today");
textsToClassify.send("free money money");
textsToClassify.send("what are you doing today");
textsToClassify.send("genuine report");
// manually trigger the pipeline
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> classifiedTexts = getDataset(NaiveBayesTrainer.CLASSIFIED_TEXTS);
Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("how are you doing today")), 0.01d);
// only 'free money money' should be predicated as spam
Assert.assertEquals(1.0d, Bytes.toDouble(classifiedTexts.get().read("free money money")), 0.01d);
Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("what are you doing today")), 0.01d);
Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("genuine report")), 0.01d);
validateMetric(5, appId, "source1.records.out");
validateMetric(5, appId, "source2.records.out");
validateMetric(10, appId, "customsink.records.in");
}
Aggregations