use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkServiceIntegrationTestRun method testSparkWithService.
@Test
public void testSparkWithService() throws Exception {
ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class);
startService(applicationManager);
SparkManager sparkManager = applicationManager.getSparkManager(TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> datasetManager = getDataset("result");
KeyValueTable results = datasetManager.get();
for (int i = 1; i <= 5; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key))));
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkStreamIntegrationTestRun method testSparkCrossNS.
@Test
public void testSparkCrossNS() throws Exception {
// Test for reading stream cross namespace, reading and writing to dataset cross namespace
// TestSparkStreamIntegrationApp deployed in default namespace
// which reads a stream from streamNS and writes to a dataset in its own ns (default)
// TestSparkCrossNSDatasetApp deployed at crossNSDatasetAppNS:
// reading from the dataset in default (created by TestSparkStreamIntegrationApp) and write to a dataset
// in outputDatasetNS
NamespaceMeta streamNSMeta = new NamespaceMeta.Builder().setName("streamNS").build();
NamespaceMeta crossNSDatasetAppNS = new NamespaceMeta.Builder().setName("crossNSDatasetAppNS").build();
NamespaceMeta outputDatasetNS = new NamespaceMeta.Builder().setName("outputDatasetNS").build();
getNamespaceAdmin().create(streamNSMeta);
getNamespaceAdmin().create(crossNSDatasetAppNS);
getNamespaceAdmin().create(outputDatasetNS);
addDatasetInstance(outputDatasetNS.getNamespaceId().dataset("finalDataset"), "keyValueTable");
StreamManager streamManager = getStreamManager(streamNSMeta.getNamespaceId().stream("testStream"));
streamManager.createStream();
for (int i = 0; i < 50; i++) {
streamManager.send(String.valueOf(i));
}
// deploy TestSparkStreamIntegrationApp in default namespace
ApplicationManager spark1 = deployApplication(TestSparkStreamIntegrationApp.class);
Map<String, String> args = ImmutableMap.of(TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAMESPACE, streamNSMeta.getNamespaceId().getNamespace(), TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAME, "testStream");
SparkManager sparkManager = spark1.getSparkManager("SparkStreamProgram").start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
// Verify the results written in default namespace by spark1
DataSetManager<KeyValueTable> datasetManager = getDataset("result");
verifyDatasetResult(datasetManager);
// deploy the cross ns dataset app in datasetNS namespace
ApplicationManager spark2 = deployApplication(crossNSDatasetAppNS.getNamespaceId(), TestSparkCrossNSDatasetApp.class);
args = ImmutableMap.of(TestSparkCrossNSDatasetApp.INPUT_DATASET_NAMESPACE, NamespaceId.DEFAULT.getNamespace(), TestSparkCrossNSDatasetApp.INPUT_DATASET_NAME, "result", TestSparkCrossNSDatasetApp.OUTPUT_DATASET_NAMESPACE, outputDatasetNS.getNamespaceId().getNamespace(), TestSparkCrossNSDatasetApp.OUTPUT_DATASET_NAME, "finalDataset");
sparkManager = spark2.getSparkManager("SparkCrossNSDatasetProgram").start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
// Verify the results written in DEFAULT by spark2
datasetManager = getDataset(outputDatasetNS.getNamespaceId().dataset("finalDataset"));
verifyDatasetResult(datasetManager);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkKMeansAppTest method test.
@Test
public void test() throws Exception {
// Deploy the Application
ApplicationManager appManager = deployApplication(SparkKMeansApp.class);
// Start the Flow
FlowManager flowManager = appManager.getFlowManager("PointsFlow").start();
// Send a few points to the stream
StreamManager streamManager = getStreamManager("pointsStream");
streamManager.send("10.6 519.2 110.3");
streamManager.send("10.6 518.1 110.1");
streamManager.send("10.6 519.6 109.9");
streamManager.send("10.6 517.9 108.9");
streamManager.send("10.7 518 109.2");
// Wait for the events to be processed, or at most 5 seconds
RuntimeMetrics metrics = flowManager.getFlowletMetrics("reader");
metrics.waitForProcessed(3, 5, TimeUnit.SECONDS);
// Start a Spark Program
SparkManager sparkManager = appManager.getSparkManager("SparkKMeansProgram").start();
sparkManager.waitForFinish(60, TimeUnit.SECONDS);
flowManager.stop();
// Start CentersService
ServiceManager serviceManager = appManager.getServiceManager(SparkKMeansApp.CentersService.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForStatus(true);
// Request data and verify it
String response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/1"));
String[] coordinates = response.split(",");
Assert.assertTrue(coordinates.length == 3);
for (String coordinate : coordinates) {
double value = Double.parseDouble(coordinate);
Assert.assertTrue(value > 0);
}
// Request data by incorrect index and verify response
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/10");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
Assert.assertEquals(HttpURLConnection.HTTP_NO_CONTENT, conn.getResponseCode());
} finally {
conn.disconnect();
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testAlertPublisher.
@Test
public void testAlertPublisher() throws Exception {
String sinkName = "alertSink";
final String topic = "alertTopic";
Schema schema = Schema.recordOf("x", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
StructuredRecord record1 = StructuredRecord.builder(schema).set("id", 1L).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("id", 2L).build();
StructuredRecord alertRecord = StructuredRecord.builder(schema).build();
/*
* source --> nullAlert --> sink
* |
* |--> TMS publisher
*/
DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(schema, ImmutableList.of(record1, record2, alertRecord)))).addStage(new ETLStage("nullAlert", NullAlertTransform.getPlugin("id"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("tms", TMSAlertPublisher.getPlugin(topic, NamespaceId.DEFAULT.getNamespace()))).addConnection("source", "nullAlert").addConnection("nullAlert", "sink").addConnection("nullAlert", "tms").build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("AlertTest");
ApplicationManager appManager = deployApplication(appId, appRequest);
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start();
sparkManager.waitForStatus(true, 10, 1);
final Set<StructuredRecord> expectedRecords = ImmutableSet.of(record1, record2);
final Set<Alert> expectedMessages = ImmutableSet.of(new Alert("nullAlert", new HashMap<String, String>()));
final DataSetManager<Table> sinkTable = getDataset(sinkName);
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
// get alerts from TMS
try {
getMessagingAdmin(NamespaceId.DEFAULT.getNamespace()).getTopicProperties(topic);
} catch (TopicNotFoundException e) {
return false;
}
MessageFetcher messageFetcher = getMessagingContext().getMessageFetcher();
Set<Alert> actualMessages = new HashSet<>();
try (CloseableIterator<Message> iter = messageFetcher.fetch(NamespaceId.DEFAULT.getNamespace(), topic, 5, 0)) {
while (iter.hasNext()) {
Message message = iter.next();
Alert alert = GSON.fromJson(message.getPayloadAsString(), Alert.class);
actualMessages.add(alert);
}
}
// get records from sink
sinkTable.flush();
Set<StructuredRecord> outputRecords = new HashSet<>();
outputRecords.addAll(MockSink.readOutput(sinkTable));
return expectedRecords.equals(outputRecords) && expectedMessages.equals(actualMessages);
}
}, 4, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStatus(false, 10, 1);
validateMetric(appId, "source.records.out", 3);
validateMetric(appId, "nullAlert.records.in", 3);
validateMetric(appId, "nullAlert.records.out", 2);
validateMetric(appId, "nullAlert.records.alert", 1);
validateMetric(appId, "sink.records.in", 2);
validateMetric(appId, "tms.records.in", 1);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testErrorTransform.
@Test
public void testErrorTransform() throws Exception {
String sink1TableName = "errTestOut1";
String sink2TableName = "errTestOut2";
Schema inputSchema = Schema.recordOf("user", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("name", "Leo").build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").build(), StructuredRecord.builder(inputSchema).set("name", "Don").build(), StructuredRecord.builder(inputSchema).set("name", "Mike").build(), StructuredRecord.builder(inputSchema).set("name", "April").build());
/*
*
* source--> filter1 --> filter2 --> agg1 --> agg2
* | | | |
* |-----------|---------|--------|--------|--> flatten errors --> sink1
* |
* |--> filter errors --> sink2
* arrows coming out the right represent output records
* arrows coming out the bottom represent error records
* this will test multiple stages from multiple phases emitting errors to the same stage
* as well as errors from one stage going to multiple stages
*/
DataStreamsConfig config = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("source", MockSource.getPlugin(inputSchema, input))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", "Leo"))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", "Ralph"))).addStage(new ETLStage("agg1", GroupFilterAggregator.getPlugin("name", "Don"))).addStage(new ETLStage("agg2", GroupFilterAggregator.getPlugin("name", "Mike"))).addStage(new ETLStage("errorflatten", FlattenErrorTransform.getPlugin())).addStage(new ETLStage("errorfilter", FilterErrorTransform.getPlugin(3))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1TableName))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2TableName))).addConnection("source", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "agg1").addConnection("agg1", "agg2").addConnection("filter1", "errorflatten").addConnection("filter1", "errorfilter").addConnection("filter2", "errorflatten").addConnection("filter2", "errorfilter").addConnection("agg1", "errorflatten").addConnection("agg1", "errorfilter").addConnection("agg2", "errorflatten").addConnection("agg2", "errorfilter").addConnection("errorflatten", "sink1").addConnection("errorfilter", "sink2").build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("ErrTransformTest");
ApplicationManager appManager = deployApplication(appId, appRequest);
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start();
sparkManager.waitForStatus(true, 10, 1);
Schema flattenSchema = Schema.recordOf("erroruser", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("errMsg", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("errCode", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errStage", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
final Set<StructuredRecord> expected = ImmutableSet.of(StructuredRecord.builder(flattenSchema).set("name", "Leo").set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter1").build(), StructuredRecord.builder(flattenSchema).set("name", "Ralph").set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter2").build(), StructuredRecord.builder(flattenSchema).set("name", "Don").set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg1").build(), StructuredRecord.builder(flattenSchema).set("name", "Mike").set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg2").build());
final DataSetManager<Table> sink1Table = getDataset(sink1TableName);
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
sink1Table.flush();
Set<StructuredRecord> outputRecords = new HashSet<>();
outputRecords.addAll(MockSink.readOutput(sink1Table));
return expected.equals(outputRecords);
}
}, 4, TimeUnit.MINUTES);
final Set<StructuredRecord> expected2 = ImmutableSet.of(StructuredRecord.builder(inputSchema).set("name", "Leo").build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").build());
final DataSetManager<Table> sink2Table = getDataset(sink2TableName);
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
sink2Table.flush();
Set<StructuredRecord> outputRecords = new HashSet<>();
outputRecords.addAll(MockSink.readOutput(sink2Table));
return expected2.equals(outputRecords);
}
}, 4, TimeUnit.MINUTES);
}
Aggregations