use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testPySpark.
@Test
public void testPySpark() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
// Write something to the stream
StreamManager streamManager = getStreamManager("SparkStream");
for (int i = 0; i < 100; i++) {
streamManager.send("Event " + i);
}
File outputDir = new File(TMP_FOLDER.newFolder(), "output");
SparkManager sparkManager = appManager.getSparkManager(PythonSpark.class.getSimpleName()).start(ImmutableMap.of("input.stream", "SparkStream", "output.path", outputDir.getAbsolutePath()));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
// Verify the result
File resultFile = Iterables.find(DirUtils.listFiles(outputDir), new Predicate<File>() {
@Override
public boolean apply(File input) {
return !input.getName().endsWith(".crc") && !input.getName().startsWith("_SUCCESS");
}
});
List<String> lines = Files.readAllLines(resultFile.toPath(), StandardCharsets.UTF_8);
Assert.assertTrue(!lines.isEmpty());
// Expected only even number
int count = 0;
for (String line : lines) {
line = line.trim();
if (!line.isEmpty()) {
Assert.assertEquals("Event " + count, line);
count += 2;
}
}
Assert.assertEquals(100, count);
final Map<String, String> tags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, TestSparkApp.class.getSimpleName(), Constants.Metrics.Tag.SPARK, PythonSpark.class.getSimpleName());
Tasks.waitFor(100L, new Callable<Long>() {
@Override
public Long call() throws Exception {
return getMetricsManager().getTotalMetric(tags, "user.body");
}
}, 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testSparkHttpService.
@Test
public void testSparkHttpService() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
SparkManager sparkManager = applicationManager.getSparkManager(SparkServiceProgram.class.getSimpleName()).start();
URL url = sparkManager.getServiceURL(5, TimeUnit.MINUTES);
Assert.assertNotNull(url);
// GET request to sum n numbers.
URL sumURL = url.toURI().resolve("sum?n=" + Joiner.on("&n=").join(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)).toURL();
HttpURLConnection urlConn = (HttpURLConnection) sumURL.openConnection();
Assert.assertEquals(HttpURLConnection.HTTP_OK, urlConn.getResponseCode());
try (InputStream is = urlConn.getInputStream()) {
Assert.assertEquals(55, Integer.parseInt(new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)));
}
URL wordcountURL = url.toURI().resolve("wordcount").toURL();
urlConn = (HttpURLConnection) wordcountURL.openConnection();
// POST lines of sentences
urlConn.setDoOutput(true);
urlConn.setChunkedStreamingMode(10);
List<String> messages = new ArrayList<>();
try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8"))) {
for (int i = 0; i < 10; i++) {
writer.printf("Message number %d\n", i);
messages.add("Message number " + i);
}
}
Assert.assertEquals(200, urlConn.getResponseCode());
try (Reader reader = new InputStreamReader(urlConn.getInputStream(), "UTF-8")) {
Map<String, Integer> result = new Gson().fromJson(reader, new TypeToken<Map<String, Integer>>() {
}.getType());
// Do a wordcount locally to get the expected result
Map<String, Integer> expected = messages.stream().flatMap((Function<String, Stream<String>>) s -> Arrays.stream(s.split("\\s+"))).map(s -> Maps.immutableEntry(s, 1)).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (v1, v2) -> v1 + v2));
Assert.assertEquals(expected, result);
}
sparkManager.stop();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testTransaction.
@Test
public void testTransaction() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
StreamManager streamManager = getStreamManager("SparkStream");
// Write some sentences to the stream
streamManager.send("red fox");
streamManager.send("brown fox");
streamManager.send("grey fox");
streamManager.send("brown bear");
streamManager.send("black bear");
// Run the spark program
SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
// Verify result from dataset before the Spark program terminates
final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
final KeyValueTable resultTable = resultManager.get();
// Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {
@Override
public Set<String> call() throws Exception {
// This is to start a new TX
resultManager.flush();
LOG.info("Reading from threshold result");
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
return ImmutableSet.copyOf(Iterators.transform(itor, input -> {
String word = Bytes.toString(input.getKey());
LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
return word;
}));
}
}
}, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testDynamicSpark.
@Test
public void testDynamicSpark() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
// Populate data into the stream
StreamManager streamManager = getStreamManager("SparkStream");
for (int i = 0; i < 10; i++) {
streamManager.send("Line " + (i + 1));
}
SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("input", "SparkStream", "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// Validate the result written to dataset
KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
// There should be ten "Line"
Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line")));
// Each number should appear once
for (int i = 0; i < 10; i++) {
Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1))));
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkMetricsIntegrationTestRun method testSparkMetrics.
@Test
public void testSparkMetrics() throws Exception {
ApplicationManager applicationManager = deployApplication(TestSparkMetricsIntegrationApp.class);
SparkManager sparkManager = applicationManager.getSparkManager(TestSparkMetricsIntegrationApp.APP_SPARK_NAME).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
List<RunRecord> history = sparkManager.getHistory(ProgramRunStatus.COMPLETED);
Assert.assertEquals(1, history.size());
// Wait for the metrics to get updated
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return getSparkMetric(TestSparkMetricsIntegrationApp.APP_NAME, TestSparkMetricsIntegrationApp.APP_SPARK_NAME, "system.driver.BlockManager.memory.remainingMem_MB") > 0;
}
}, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
Tasks.waitFor(2L, new Callable<Long>() {
@Override
public Long call() throws Exception {
return getSparkMetric(TestSparkMetricsIntegrationApp.APP_NAME, TestSparkMetricsIntegrationApp.APP_SPARK_NAME, "user.more.than.30");
}
}, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
Aggregations