Search in sources :

Example 36 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class Spark2Test method testScalaSparkCrossNSStream.

@Test
public void testScalaSparkCrossNSStream() throws Exception {
    // create a namespace for stream and create the stream in it
    NamespaceMeta crossNSStreamMeta = new NamespaceMeta.Builder().setName("streamSpaceForSpark").build();
    getNamespaceAdmin().create(crossNSStreamMeta);
    StreamManager streamManager = getStreamManager(crossNSStreamMeta.getNamespaceId().stream("testStream"));
    // create a namespace for dataset and add the dataset instance in it
    NamespaceMeta crossNSDatasetMeta = new NamespaceMeta.Builder().setName("crossNSDataset").build();
    getNamespaceAdmin().create(crossNSDatasetMeta);
    addDatasetInstance(crossNSDatasetMeta.getNamespaceId().dataset("count"), "keyValueTable");
    // write something to the stream
    streamManager.createStream();
    for (int i = 0; i < 50; i++) {
        streamManager.send(String.valueOf(i));
    }
    // deploy the spark app in another namespace (default)
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    Map<String, String> args = ImmutableMap.of(ScalaCrossNSProgram.STREAM_NAMESPACE(), crossNSStreamMeta.getNamespaceId().getNamespace(), ScalaCrossNSProgram.DATASET_NAMESPACE(), crossNSDatasetMeta.getNamespaceId().getNamespace(), ScalaCrossNSProgram.DATASET_NAME(), "count");
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCrossNSProgram.class.getSimpleName()).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    // get the dataset from the other namespace where we expect it to exist and compare the data
    DataSetManager<KeyValueTable> countManager = getDataset(crossNSDatasetMeta.getNamespaceId().dataset("count"));
    KeyValueTable results = countManager.get();
    for (int i = 0; i < 50; i++) {
        byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
        Assert.assertArrayEquals(key, results.read(key));
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 37 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class Spark2Test method testScalaSparkCrossNSDataset.

@Test
public void testScalaSparkCrossNSDataset() throws Exception {
    // Deploy and create a dataset in namespace datasetSpaceForSpark
    NamespaceMeta inputDSNSMeta = new NamespaceMeta.Builder().setName("datasetSpaceForSpark").build();
    getNamespaceAdmin().create(inputDSNSMeta);
    deploy(inputDSNSMeta.getNamespaceId(), SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset(inputDSNSMeta.getNamespaceId().dataset("keys"));
    prepareInputData(keysManager);
    Map<String, String> args = ImmutableMap.of(ScalaCharCountProgram.INPUT_DATASET_NAMESPACE(), inputDSNSMeta.getNamespaceId().getNamespace(), ScalaCharCountProgram.INPUT_DATASET_NAME(), "keys");
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 38 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class Spark2Test method testSparkWithObjectStore.

@Test
public void testSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
    // validate that the table emitted metrics
    // one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
    Tasks.waitFor(4L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
            if (metrics.isEmpty()) {
                return 0L;
            }
            Assert.assertEquals(1, metrics.size());
            MetricTimeSeries ts = metrics.iterator().next();
            Assert.assertEquals(1, ts.getTimeValues().size());
            return ts.getTimeValues().get(0).getValue();
        }
    }, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(co.cask.cdap.test.SparkManager) MetricTimeSeries(co.cask.cdap.api.metrics.MetricTimeSeries) IOException(java.io.IOException) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Collection(java.util.Collection) MetricDataQuery(co.cask.cdap.api.metrics.MetricDataQuery) Test(org.junit.Test)

Example 39 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class Spark2Test method testSparkWithLocalFiles.

private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, appClass);
    URI localFile = createLocalPropertiesFile(prefix);
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
    KeyValueTable kvTable = kvTableManager.get();
    Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
    List<byte[]> deleteKeys = new ArrayList<>();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
        for (int i = 0; i < 3; i++) {
            KeyValue<byte[], byte[]> next = scan.next();
            Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
            deleteKeys.add(next.getKey());
        }
        Assert.assertFalse(scan.hasNext());
    }
    // Cleanup after run
    kvTableManager.flush();
    for (byte[] key : deleteKeys) {
        kvTable.delete(key);
    }
    kvTableManager.flush();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) ArrayList(java.util.ArrayList) URI(java.net.URI) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable)

Example 40 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SpamClassifierTest method test.

@Test
public void test() throws Exception {
    // Deploy the KafkaIngestionApp application
    ApplicationManager appManager = deployApplication(SpamClassifier.class);
    ingestTrainingData();
    publishKafkaMessages();
    // start spark streaming program
    SparkManager sparkManager = appManager.getSparkManager(SpamClassifierProgram.class.getSimpleName());
    Map<String, String> runtimeArgs = new HashMap<>();
    runtimeArgs.put("kafka.brokers", "127.0.0.1:" + kafkaPort);
    runtimeArgs.put("kafka.topics", KAFKA_TOPIC);
    sparkManager.start(runtimeArgs);
    // Start and wait for service to start
    final ServiceManager serviceManager = appManager.getServiceManager(SpamClassifier.SERVICE_HANDLER).start();
    serviceManager.waitForStatus(true);
    // wait for spark streaming program to write to dataset
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            return testClassification(serviceManager, "1", SpamClassifier.SpamClassifierServiceHandler.SPAM) && testClassification(serviceManager, "2", SpamClassifier.SpamClassifierServiceHandler.HAM);
        }
    }, 60, TimeUnit.SECONDS);
    // stop spark program
    sparkManager.stop();
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 1, TimeUnit.MINUTES);
    appManager.stopAll();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) HashMap(java.util.HashMap) ServiceManager(co.cask.cdap.test.ServiceManager) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Aggregations

SparkManager (co.cask.cdap.test.SparkManager)58 ApplicationManager (co.cask.cdap.test.ApplicationManager)52 Test (org.junit.Test)48 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)29 StreamManager (co.cask.cdap.test.StreamManager)21 HashMap (java.util.HashMap)14 ImmutableSet (com.google.common.collect.ImmutableSet)13 Set (java.util.Set)13 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 TimeoutException (java.util.concurrent.TimeoutException)12 Schema (co.cask.cdap.api.data.schema.Schema)10 Table (co.cask.cdap.api.dataset.table.Table)10 ServiceManager (co.cask.cdap.test.ServiceManager)10 IOException (java.io.IOException)10 URL (java.net.URL)10 Location (org.apache.twill.filesystem.Location)10 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)9 ApplicationId (co.cask.cdap.proto.id.ApplicationId)9 HashSet (java.util.HashSet)9 AppRequest (co.cask.cdap.proto.artifact.AppRequest)8