use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class Spark2Test method testScalaSparkCrossNSStream.
@Test
public void testScalaSparkCrossNSStream() throws Exception {
// create a namespace for stream and create the stream in it
NamespaceMeta crossNSStreamMeta = new NamespaceMeta.Builder().setName("streamSpaceForSpark").build();
getNamespaceAdmin().create(crossNSStreamMeta);
StreamManager streamManager = getStreamManager(crossNSStreamMeta.getNamespaceId().stream("testStream"));
// create a namespace for dataset and add the dataset instance in it
NamespaceMeta crossNSDatasetMeta = new NamespaceMeta.Builder().setName("crossNSDataset").build();
getNamespaceAdmin().create(crossNSDatasetMeta);
addDatasetInstance(crossNSDatasetMeta.getNamespaceId().dataset("count"), "keyValueTable");
// write something to the stream
streamManager.createStream();
for (int i = 0; i < 50; i++) {
streamManager.send(String.valueOf(i));
}
// deploy the spark app in another namespace (default)
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
Map<String, String> args = ImmutableMap.of(ScalaCrossNSProgram.STREAM_NAMESPACE(), crossNSStreamMeta.getNamespaceId().getNamespace(), ScalaCrossNSProgram.DATASET_NAMESPACE(), crossNSDatasetMeta.getNamespaceId().getNamespace(), ScalaCrossNSProgram.DATASET_NAME(), "count");
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCrossNSProgram.class.getSimpleName()).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
// get the dataset from the other namespace where we expect it to exist and compare the data
DataSetManager<KeyValueTable> countManager = getDataset(crossNSDatasetMeta.getNamespaceId().dataset("count"));
KeyValueTable results = countManager.get();
for (int i = 0; i < 50; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertArrayEquals(key, results.read(key));
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class Spark2Test method testScalaSparkCrossNSDataset.
@Test
public void testScalaSparkCrossNSDataset() throws Exception {
// Deploy and create a dataset in namespace datasetSpaceForSpark
NamespaceMeta inputDSNSMeta = new NamespaceMeta.Builder().setName("datasetSpaceForSpark").build();
getNamespaceAdmin().create(inputDSNSMeta);
deploy(inputDSNSMeta.getNamespaceId(), SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset(inputDSNSMeta.getNamespaceId().dataset("keys"));
prepareInputData(keysManager);
Map<String, String> args = ImmutableMap.of(ScalaCharCountProgram.INPUT_DATASET_NAMESPACE(), inputDSNSMeta.getNamespaceId().getNamespace(), ScalaCharCountProgram.INPUT_DATASET_NAME(), "keys");
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class Spark2Test method testSparkWithObjectStore.
@Test
public void testSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
// validate that the table emitted metrics
// one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
Tasks.waitFor(4L, new Callable<Long>() {
@Override
public Long call() throws Exception {
Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
if (metrics.isEmpty()) {
return 0L;
}
Assert.assertEquals(1, metrics.size());
MetricTimeSeries ts = metrics.iterator().next();
Assert.assertEquals(1, ts.getTimeValues().size());
return ts.getTimeValues().get(0).getValue();
}
}, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class Spark2Test method testSparkWithLocalFiles.
private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, appClass);
URI localFile = createLocalPropertiesFile(prefix);
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
KeyValueTable kvTable = kvTableManager.get();
Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
List<byte[]> deleteKeys = new ArrayList<>();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
for (int i = 0; i < 3; i++) {
KeyValue<byte[], byte[]> next = scan.next();
Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
deleteKeys.add(next.getKey());
}
Assert.assertFalse(scan.hasNext());
}
// Cleanup after run
kvTableManager.flush();
for (byte[] key : deleteKeys) {
kvTable.delete(key);
}
kvTableManager.flush();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SpamClassifierTest method test.
@Test
public void test() throws Exception {
// Deploy the KafkaIngestionApp application
ApplicationManager appManager = deployApplication(SpamClassifier.class);
ingestTrainingData();
publishKafkaMessages();
// start spark streaming program
SparkManager sparkManager = appManager.getSparkManager(SpamClassifierProgram.class.getSimpleName());
Map<String, String> runtimeArgs = new HashMap<>();
runtimeArgs.put("kafka.brokers", "127.0.0.1:" + kafkaPort);
runtimeArgs.put("kafka.topics", KAFKA_TOPIC);
sparkManager.start(runtimeArgs);
// Start and wait for service to start
final ServiceManager serviceManager = appManager.getServiceManager(SpamClassifier.SERVICE_HANDLER).start();
serviceManager.waitForStatus(true);
// wait for spark streaming program to write to dataset
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return testClassification(serviceManager, "1", SpamClassifier.SpamClassifierServiceHandler.SPAM) && testClassification(serviceManager, "2", SpamClassifier.SpamClassifierServiceHandler.HAM);
}
}, 60, TimeUnit.SECONDS);
// stop spark program
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 1, TimeUnit.MINUTES);
appManager.stopAll();
}
Aggregations