Search in sources :

Example 11 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testPySpark.

@Test
public void testPySpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    // Write something to the stream
    StreamManager streamManager = getStreamManager("SparkStream");
    for (int i = 0; i < 100; i++) {
        streamManager.send("Event " + i);
    }
    File outputDir = new File(TMP_FOLDER.newFolder(), "output");
    SparkManager sparkManager = appManager.getSparkManager(PythonSpark.class.getSimpleName()).start(ImmutableMap.of("input.stream", "SparkStream", "output.path", outputDir.getAbsolutePath()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // Verify the result
    File resultFile = Iterables.find(DirUtils.listFiles(outputDir), new Predicate<File>() {

        @Override
        public boolean apply(File input) {
            return !input.getName().endsWith(".crc") && !input.getName().startsWith("_SUCCESS");
        }
    });
    List<String> lines = Files.readAllLines(resultFile.toPath(), StandardCharsets.UTF_8);
    Assert.assertTrue(!lines.isEmpty());
    // Expected only even number
    int count = 0;
    for (String line : lines) {
        line = line.trim();
        if (!line.isEmpty()) {
            Assert.assertEquals("Event " + count, line);
            count += 2;
        }
    }
    Assert.assertEquals(100, count);
    final Map<String, String> tags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, TestSparkApp.class.getSimpleName(), Constants.Metrics.Tag.SPARK, PythonSpark.class.getSimpleName());
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            return getMetricsManager().getTotalMetric(tags, "user.body");
        }
    }, 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) IOException(java.io.IOException) StreamManager(co.cask.cdap.test.StreamManager) PythonSpark(co.cask.cdap.spark.app.PythonSpark) File(java.io.File) Test(org.junit.Test)

Example 12 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testSparkHttpService.

@Test
public void testSparkHttpService() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    SparkManager sparkManager = applicationManager.getSparkManager(SparkServiceProgram.class.getSimpleName()).start();
    URL url = sparkManager.getServiceURL(5, TimeUnit.MINUTES);
    Assert.assertNotNull(url);
    // GET request to sum n numbers.
    URL sumURL = url.toURI().resolve("sum?n=" + Joiner.on("&n=").join(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)).toURL();
    HttpURLConnection urlConn = (HttpURLConnection) sumURL.openConnection();
    Assert.assertEquals(HttpURLConnection.HTTP_OK, urlConn.getResponseCode());
    try (InputStream is = urlConn.getInputStream()) {
        Assert.assertEquals(55, Integer.parseInt(new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)));
    }
    URL wordcountURL = url.toURI().resolve("wordcount").toURL();
    urlConn = (HttpURLConnection) wordcountURL.openConnection();
    // POST lines of sentences
    urlConn.setDoOutput(true);
    urlConn.setChunkedStreamingMode(10);
    List<String> messages = new ArrayList<>();
    try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8"))) {
        for (int i = 0; i < 10; i++) {
            writer.printf("Message number %d\n", i);
            messages.add("Message number " + i);
        }
    }
    Assert.assertEquals(200, urlConn.getResponseCode());
    try (Reader reader = new InputStreamReader(urlConn.getInputStream(), "UTF-8")) {
        Map<String, Integer> result = new Gson().fromJson(reader, new TypeToken<Map<String, Integer>>() {
        }.getType());
        // Do a wordcount locally to get the expected result
        Map<String, Integer> expected = messages.stream().flatMap((Function<String, Stream<String>>) s -> Arrays.stream(s.split("\\s+"))).map(s -> Maps.immutableEntry(s, 1)).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (v1, v2) -> v1 + v2));
        Assert.assertEquals(expected, result);
    }
    sparkManager.stop();
}
Also used : HttpURLConnection(java.net.HttpURLConnection) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) Arrays(java.util.Arrays) StreamProperties(co.cask.cdap.proto.StreamProperties) TypeToken(com.google.gson.reflect.TypeToken) ScalaSparkLogParser(co.cask.cdap.spark.app.ScalaSparkLogParser) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) URL(java.net.URL) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) LoggerFactory(org.slf4j.LoggerFactory) SparkManager(co.cask.cdap.test.SparkManager) FileSetArguments(co.cask.cdap.api.dataset.lib.FileSetArguments) StreamSQLSpark(co.cask.cdap.spark.app.StreamSQLSpark) StringLengthUDT(co.cask.cdap.spark.app.plugin.StringLengthUDT) Tasks(co.cask.cdap.common.utils.Tasks) CharStreams(com.google.common.io.CharStreams) Gson(com.google.gson.Gson) Map(java.util.Map) Locations(co.cask.cdap.common.io.Locations) DirUtils(co.cask.cdap.common.utils.DirUtils) SparkLogParser(co.cask.cdap.spark.app.SparkLogParser) ClassRule(org.junit.ClassRule) TestFrameworkTestBase(co.cask.cdap.test.base.TestFrameworkTestBase) PrintWriter(java.io.PrintWriter) Application(co.cask.cdap.api.app.Application) ImmutableSet(com.google.common.collect.ImmutableSet) IdentityHashMap(java.util.IdentityHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) Person(co.cask.cdap.spark.app.Person) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) RuntimeArguments(co.cask.cdap.api.common.RuntimeArguments) Scope(co.cask.cdap.api.common.Scope) Reader(java.io.Reader) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) Stream(java.util.stream.Stream) StringLengthFunc(co.cask.cdap.spark.app.plugin.StringLengthFunc) Predicate(com.google.common.base.Predicate) DataSetManager(co.cask.cdap.test.DataSetManager) ByteStreams(com.google.common.io.ByteStreams) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) Joiner(com.google.common.base.Joiner) NamespaceId(co.cask.cdap.proto.id.NamespaceId) Iterables(com.google.common.collect.Iterables) SparkServiceProgram(co.cask.cdap.spark.app.SparkServiceProgram) BeforeClass(org.junit.BeforeClass) Location(org.apache.twill.filesystem.Location) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) FileSet(co.cask.cdap.api.dataset.lib.FileSet) StreamManager(co.cask.cdap.test.StreamManager) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) ScheduleId(co.cask.cdap.proto.id.ScheduleId) Schema(co.cask.cdap.api.data.schema.Schema) ProgramRunStatus(co.cask.cdap.proto.ProgramRunStatus) DatasetSQLSpark(co.cask.cdap.spark.app.DatasetSQLSpark) ScalaDynamicSpark(co.cask.cdap.spark.app.ScalaDynamicSpark) PythonSpark(co.cask.cdap.spark.app.PythonSpark) OutputStreamWriter(java.io.OutputStreamWriter) Constants(co.cask.cdap.common.conf.Constants) PrintStream(java.io.PrintStream) Charsets(com.google.common.base.Charsets) Logger(org.slf4j.Logger) PluggableFunc(co.cask.cdap.spark.app.plugin.PluggableFunc) Files(java.nio.file.Files) Bytes(co.cask.cdap.api.common.Bytes) Test(org.junit.Test) IOException(java.io.IOException) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) TestConfiguration(co.cask.cdap.test.TestConfiguration) Maps(com.google.common.collect.Maps) InputStreamReader(java.io.InputStreamReader) SparkAppUsingGetDataset(co.cask.cdap.spark.app.SparkAppUsingGetDataset) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) URLEncoder(java.net.URLEncoder) WorkflowManager(co.cask.cdap.test.WorkflowManager) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) ClassicSparkProgram(co.cask.cdap.spark.app.ClassicSparkProgram) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) Assert(org.junit.Assert) Collections(java.util.Collections) StreamFormatSpecSpark(co.cask.cdap.spark.app.StreamFormatSpecSpark) InputStream(java.io.InputStream) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) Gson(com.google.gson.Gson) URL(java.net.URL) HttpURLConnection(java.net.HttpURLConnection) TypeToken(com.google.gson.reflect.TypeToken) OutputStreamWriter(java.io.OutputStreamWriter) Stream(java.util.stream.Stream) PrintStream(java.io.PrintStream) InputStream(java.io.InputStream) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 13 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testTransaction.

@Test
public void testTransaction() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    StreamManager streamManager = getStreamManager("SparkStream");
    // Write some sentences to the stream
    streamManager.send("red fox");
    streamManager.send("brown fox");
    streamManager.send("grey fox");
    streamManager.send("brown bear");
    streamManager.send("black bear");
    // Run the spark program
    SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
    // Verify result from dataset before the Spark program terminates
    final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
    final KeyValueTable resultTable = resultManager.get();
    // Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
    Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {

        @Override
        public Set<String> call() throws Exception {
            // This is to start a new TX
            resultManager.flush();
            LOG.info("Reading from threshold result");
            try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
                return ImmutableSet.copyOf(Iterators.transform(itor, input -> {
                    String word = Bytes.toString(input.getKey());
                    LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
                    return word;
                }));
            }
        }
    }, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    sparkManager.stop();
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) SparkManager(co.cask.cdap.test.SparkManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(co.cask.cdap.api.dataset.lib.FileSet) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) IOException(java.io.IOException) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) Test(org.junit.Test)

Example 14 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testDynamicSpark.

@Test
public void testDynamicSpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    // Populate data into the stream
    StreamManager streamManager = getStreamManager("SparkStream");
    for (int i = 0; i < 10; i++) {
        streamManager.send("Line " + (i + 1));
    }
    SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("input", "SparkStream", "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // Validate the result written to dataset
    KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
    // There should be ten "Line"
    Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line")));
    // Each number should appear once
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1))));
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ScalaDynamicSpark(co.cask.cdap.spark.app.ScalaDynamicSpark) Test(org.junit.Test)

Example 15 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkMetricsIntegrationTestRun method testSparkMetrics.

@Test
public void testSparkMetrics() throws Exception {
    ApplicationManager applicationManager = deployApplication(TestSparkMetricsIntegrationApp.class);
    SparkManager sparkManager = applicationManager.getSparkManager(TestSparkMetricsIntegrationApp.APP_SPARK_NAME).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
    List<RunRecord> history = sparkManager.getHistory(ProgramRunStatus.COMPLETED);
    Assert.assertEquals(1, history.size());
    // Wait for the metrics to get updated
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            return getSparkMetric(TestSparkMetricsIntegrationApp.APP_NAME, TestSparkMetricsIntegrationApp.APP_SPARK_NAME, "system.driver.BlockManager.memory.remainingMem_MB") > 0;
        }
    }, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    Tasks.waitFor(2L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            return getSparkMetric(TestSparkMetricsIntegrationApp.APP_NAME, TestSparkMetricsIntegrationApp.APP_SPARK_NAME, "user.more.than.30");
        }
    }, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
Also used : RunRecord(co.cask.cdap.proto.RunRecord) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) Test(org.junit.Test)

Aggregations

SparkManager (co.cask.cdap.test.SparkManager)58 ApplicationManager (co.cask.cdap.test.ApplicationManager)52 Test (org.junit.Test)48 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)29 StreamManager (co.cask.cdap.test.StreamManager)21 HashMap (java.util.HashMap)14 ImmutableSet (com.google.common.collect.ImmutableSet)13 Set (java.util.Set)13 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 TimeoutException (java.util.concurrent.TimeoutException)12 Schema (co.cask.cdap.api.data.schema.Schema)10 Table (co.cask.cdap.api.dataset.table.Table)10 ServiceManager (co.cask.cdap.test.ServiceManager)10 IOException (java.io.IOException)10 URL (java.net.URL)10 Location (org.apache.twill.filesystem.Location)10 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)9 ApplicationId (co.cask.cdap.proto.id.ApplicationId)9 HashSet (java.util.HashSet)9 AppRequest (co.cask.cdap.proto.artifact.AppRequest)8