Search in sources :

Example 26 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTest method testPySpark.

@Test
public void testPySpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    // Write something to the stream
    StreamManager streamManager = getStreamManager("SparkStream");
    for (int i = 0; i < 100; i++) {
        streamManager.send("Event " + i);
    }
    File outputDir = new File(TMP_FOLDER.newFolder(), "output");
    SparkManager sparkManager = appManager.getSparkManager(PythonSpark.class.getSimpleName()).start(ImmutableMap.of("input.stream", "SparkStream", "output.path", outputDir.getAbsolutePath()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // Verify the result
    File resultFile = Iterables.find(DirUtils.listFiles(outputDir), new Predicate<File>() {

        @Override
        public boolean apply(File input) {
            return !input.getName().endsWith(".crc") && !input.getName().startsWith("_SUCCESS");
        }
    });
    List<String> lines = Files.readAllLines(resultFile.toPath(), StandardCharsets.UTF_8);
    Assert.assertTrue(!lines.isEmpty());
    // Expected only even number
    int count = 0;
    for (String line : lines) {
        line = line.trim();
        if (!line.isEmpty()) {
            Assert.assertEquals("Event " + count, line);
            count += 2;
        }
    }
    Assert.assertEquals(100, count);
    final Map<String, String> tags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, TestSparkApp.class.getSimpleName(), Constants.Metrics.Tag.SPARK, PythonSpark.class.getSimpleName());
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            return getMetricsManager().getTotalMetric(tags, "user.body");
        }
    }, 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) IOException(java.io.IOException) StreamManager(co.cask.cdap.test.StreamManager) PythonSpark(co.cask.cdap.spark.app.PythonSpark) File(java.io.File) Test(org.junit.Test)

Example 27 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTest method testSparkHttpService.

@Test
public void testSparkHttpService() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    SparkManager sparkManager = applicationManager.getSparkManager(SparkServiceProgram.class.getSimpleName()).start();
    URL url = sparkManager.getServiceURL(5, TimeUnit.MINUTES);
    Assert.assertNotNull(url);
    // GET request to sum n numbers.
    URL sumURL = url.toURI().resolve("sum?n=" + Joiner.on("&n=").join(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)).toURL();
    HttpURLConnection urlConn = (HttpURLConnection) sumURL.openConnection();
    Assert.assertEquals(HttpURLConnection.HTTP_OK, urlConn.getResponseCode());
    try (InputStream is = urlConn.getInputStream()) {
        Assert.assertEquals(55, Integer.parseInt(new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)));
    }
    URL wordcountURL = url.toURI().resolve("wordcount").toURL();
    urlConn = (HttpURLConnection) wordcountURL.openConnection();
    // POST lines of sentences
    urlConn.setDoOutput(true);
    urlConn.setChunkedStreamingMode(10);
    List<String> messages = new ArrayList<>();
    try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8"))) {
        for (int i = 0; i < 10; i++) {
            writer.printf("Message number %d\n", i);
            messages.add("Message number " + i);
        }
    }
    Assert.assertEquals(200, urlConn.getResponseCode());
    try (Reader reader = new InputStreamReader(urlConn.getInputStream(), "UTF-8")) {
        Map<String, Integer> result = new Gson().fromJson(reader, new TypeToken<Map<String, Integer>>() {
        }.getType());
        // Do a wordcount locally to get the expected result
        Map<String, Integer> expected = messages.stream().flatMap((Function<String, Stream<String>>) s -> Arrays.stream(s.split("\\s+"))).map(s -> Maps.immutableEntry(s, 1)).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (v1, v2) -> v1 + v2));
        Assert.assertEquals(expected, result);
    }
    sparkManager.stop();
}
Also used : HttpURLConnection(java.net.HttpURLConnection) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) Arrays(java.util.Arrays) StreamProperties(co.cask.cdap.proto.StreamProperties) TypeToken(com.google.gson.reflect.TypeToken) ScalaSparkLogParser(co.cask.cdap.spark.app.ScalaSparkLogParser) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) URL(java.net.URL) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) LoggerFactory(org.slf4j.LoggerFactory) SparkManager(co.cask.cdap.test.SparkManager) FileSetArguments(co.cask.cdap.api.dataset.lib.FileSetArguments) StreamSQLSpark(co.cask.cdap.spark.app.StreamSQLSpark) StringLengthUDT(co.cask.cdap.spark.app.plugin.StringLengthUDT) Tasks(co.cask.cdap.common.utils.Tasks) CharStreams(com.google.common.io.CharStreams) Gson(com.google.gson.Gson) Map(java.util.Map) Locations(co.cask.cdap.common.io.Locations) DirUtils(co.cask.cdap.common.utils.DirUtils) SparkLogParser(co.cask.cdap.spark.app.SparkLogParser) ClassRule(org.junit.ClassRule) TestFrameworkTestBase(co.cask.cdap.test.base.TestFrameworkTestBase) PrintWriter(java.io.PrintWriter) Application(co.cask.cdap.api.app.Application) ImmutableSet(com.google.common.collect.ImmutableSet) IdentityHashMap(java.util.IdentityHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) Person(co.cask.cdap.spark.app.Person) ApplicationManager(co.cask.cdap.test.ApplicationManager) Set(java.util.Set) RuntimeArguments(co.cask.cdap.api.common.RuntimeArguments) Scope(co.cask.cdap.api.common.Scope) Reader(java.io.Reader) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) Stream(java.util.stream.Stream) StringLengthFunc(co.cask.cdap.spark.app.plugin.StringLengthFunc) Predicate(com.google.common.base.Predicate) DataSetManager(co.cask.cdap.test.DataSetManager) ByteStreams(com.google.common.io.ByteStreams) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) Joiner(com.google.common.base.Joiner) NamespaceId(co.cask.cdap.proto.id.NamespaceId) Iterables(com.google.common.collect.Iterables) SparkServiceProgram(co.cask.cdap.spark.app.SparkServiceProgram) BeforeClass(org.junit.BeforeClass) Location(org.apache.twill.filesystem.Location) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) FileSet(co.cask.cdap.api.dataset.lib.FileSet) StreamManager(co.cask.cdap.test.StreamManager) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) ScheduleId(co.cask.cdap.proto.id.ScheduleId) Schema(co.cask.cdap.api.data.schema.Schema) ProgramRunStatus(co.cask.cdap.proto.ProgramRunStatus) DatasetSQLSpark(co.cask.cdap.spark.app.DatasetSQLSpark) ScalaDynamicSpark(co.cask.cdap.spark.app.ScalaDynamicSpark) PythonSpark(co.cask.cdap.spark.app.PythonSpark) OutputStreamWriter(java.io.OutputStreamWriter) Constants(co.cask.cdap.common.conf.Constants) PrintStream(java.io.PrintStream) Charsets(com.google.common.base.Charsets) Logger(org.slf4j.Logger) PluggableFunc(co.cask.cdap.spark.app.plugin.PluggableFunc) Files(java.nio.file.Files) Bytes(co.cask.cdap.api.common.Bytes) Test(org.junit.Test) IOException(java.io.IOException) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) TestConfiguration(co.cask.cdap.test.TestConfiguration) Maps(com.google.common.collect.Maps) InputStreamReader(java.io.InputStreamReader) SparkAppUsingGetDataset(co.cask.cdap.spark.app.SparkAppUsingGetDataset) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) URLEncoder(java.net.URLEncoder) WorkflowManager(co.cask.cdap.test.WorkflowManager) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) ClassicSparkProgram(co.cask.cdap.spark.app.ClassicSparkProgram) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) Assert(org.junit.Assert) Collections(java.util.Collections) StreamFormatSpecSpark(co.cask.cdap.spark.app.StreamFormatSpecSpark) InputStream(java.io.InputStream) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) Gson(com.google.gson.Gson) URL(java.net.URL) HttpURLConnection(java.net.HttpURLConnection) TypeToken(com.google.gson.reflect.TypeToken) OutputStreamWriter(java.io.OutputStreamWriter) Stream(java.util.stream.Stream) PrintStream(java.io.PrintStream) InputStream(java.io.InputStream) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 28 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTest method testTransaction.

@Test
public void testTransaction() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    StreamManager streamManager = getStreamManager("SparkStream");
    // Write some sentences to the stream
    streamManager.send("red fox");
    streamManager.send("brown fox");
    streamManager.send("grey fox");
    streamManager.send("brown bear");
    streamManager.send("black bear");
    // Run the spark program
    SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
    // Verify result from dataset before the Spark program terminates
    final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
    final KeyValueTable resultTable = resultManager.get();
    // Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
    Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {

        @Override
        public Set<String> call() throws Exception {
            // This is to start a new TX
            resultManager.flush();
            LOG.info("Reading from threshold result");
            try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
                return ImmutableSet.copyOf(Iterators.transform(itor, input -> {
                    String word = Bytes.toString(input.getKey());
                    LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
                    return word;
                }));
            }
        }
    }, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    sparkManager.stop();
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) SparkManager(co.cask.cdap.test.SparkManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(co.cask.cdap.api.dataset.lib.FileSet) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) IOException(java.io.IOException) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) Test(org.junit.Test)

Example 29 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTest method testDynamicSpark.

@Test
public void testDynamicSpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    // Populate data into the stream
    StreamManager streamManager = getStreamManager("SparkStream");
    for (int i = 0; i < 10; i++) {
        streamManager.send("Line " + (i + 1));
    }
    SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("input", "SparkStream", "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // Validate the result written to dataset
    KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
    // There should be ten "Line"
    Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line")));
    // Each number should appear once
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1))));
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ScalaDynamicSpark(co.cask.cdap.spark.app.ScalaDynamicSpark) Test(org.junit.Test)

Example 30 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class FileUploadServiceTestRun method testFileUploadService.

@Test
public void testFileUploadService() throws Exception {
    ApplicationManager appManager = deployApplication(FileUploadApp.class);
    // Start the service
    ServiceManager serviceManager = appManager.getServiceManager(FileUploadApp.SERVICE_NAME).start();
    serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    try {
        // Upload URL is "base/upload/pfs/[partition_value], which the partition value is a long
        URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
        // Upload with wrong MD5, should get 400.
        byte[] content = Strings.repeat("0123456789 ", 100).getBytes(Charsets.UTF_8);
        Assert.assertEquals(HttpURLConnection.HTTP_BAD_REQUEST, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, "123", 30));
        long beforeUploadTime = System.currentTimeMillis();
        // Upload with right MD5, should get 200
        Assert.assertEquals(HttpURLConnection.HTTP_OK, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, BaseEncoding.base64().encode(Hashing.md5().hashBytes(content).asBytes()), 20));
        // Inspect the partitioned file set and verify the content
        PartitionedFileSet pfs = (PartitionedFileSet) getDataset(FileUploadApp.PFS_NAME).get();
        PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build());
        Assert.assertNotNull(partition);
        // Verify a notification should have been published for the new partition
        List<Notification> notifications = getDataNotifications(beforeUploadTime);
        // Should have one message
        Assert.assertEquals(1, notifications.size());
        verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
        // There should be one file under the partition directory
        List<Location> locations = partition.getLocation().list();
        Assert.assertEquals(1, locations.size());
        Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));
        // Verify the tracking table of chunks sizes
        KeyValueTable trackingTable = (KeyValueTable) getDataset(FileUploadApp.KV_TABLE_NAME).get();
        CloseableIterator<KeyValue<byte[], byte[]>> iter = trackingTable.scan(null, null);
        // Sum up all chunks sizes as being tracked by the tracking table.
        long sum = 0;
        int iterSize = 0;
        while (iter.hasNext()) {
            KeyValue<byte[], byte[]> kv = iter.next();
            sum += Bytes.toInt(kv.getKey()) * Bytes.toLong(kv.getValue());
            iterSize++;
        }
        // The iterator should have size >= 2, since we uses different chunk size for two different upload
        Assert.assertTrue(iterSize >= 2);
        // The sum of all chunks sizes should be the same as the
        // content size * 2 (since we have one failure, one success upload)
        Assert.assertEquals(content.length * 2, sum);
    } finally {
        serviceManager.stop();
        serviceManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) URI(java.net.URI) Notification(co.cask.cdap.proto.Notification) ServiceManager(co.cask.cdap.test.ServiceManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

ApplicationManager (co.cask.cdap.test.ApplicationManager)188 Test (org.junit.Test)155 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)88 ApplicationId (co.cask.cdap.proto.id.ApplicationId)71 AppRequest (co.cask.cdap.proto.artifact.AppRequest)61 WorkflowManager (co.cask.cdap.test.WorkflowManager)59 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)58 SparkManager (co.cask.cdap.test.SparkManager)52 Table (co.cask.cdap.api.dataset.table.Table)50 ServiceManager (co.cask.cdap.test.ServiceManager)48 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)47 Schema (co.cask.cdap.api.data.schema.Schema)47 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)45 StreamManager (co.cask.cdap.test.StreamManager)43 URL (java.net.URL)33 HashSet (java.util.HashSet)27 ArrayList (java.util.ArrayList)26 IOException (java.io.IOException)25 HashMap (java.util.HashMap)24 Set (java.util.Set)24