Search in sources :

Example 6 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTestRun method testClassicSpark.

@Test
public void testClassicSpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) {
        final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName()).start();
        sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    }
    KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
    Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName())));
    Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName())));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) ClassicSparkProgram(co.cask.cdap.spark.app.ClassicSparkProgram) Test(org.junit.Test)

Example 7 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTestRun method testScalaSparkCrossNSDataset.

@Test
public void testScalaSparkCrossNSDataset() throws Exception {
    // Deploy and create a dataset in namespace datasetSpaceForSpark
    NamespaceMeta inputDSNSMeta = new NamespaceMeta.Builder().setName("datasetSpaceForSpark").build();
    getNamespaceAdmin().create(inputDSNSMeta);
    deploy(inputDSNSMeta.getNamespaceId(), SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset(inputDSNSMeta.getNamespaceId().dataset("keys"));
    prepareInputData(keysManager);
    Map<String, String> args = ImmutableMap.of(ScalaCharCountProgram.INPUT_DATASET_NAMESPACE(), inputDSNSMeta.getNamespaceId().getNamespace(), ScalaCharCountProgram.INPUT_DATASET_NAME(), "keys");
    ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 8 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTestRun method testSparkWithGetDataset.

private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
    ApplicationManager applicationManager = deploy(appClass);
    DataSetManager<FileSet> filesetManager = getDataset("logs");
    FileSet fileset = filesetManager.get();
    Location location = fileset.getLocation("nn");
    prepareInputFileSetWithLogData(location);
    Map<String, String> inputArgs = new HashMap<>();
    FileSetArguments.setInputPath(inputArgs, "nn");
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
    args.put("input", "logs");
    args.put("output", "logStats");
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
    KeyValueTable logStatsTable = logStatsManager.get();
    validateGetDatasetOutput(logStatsTable);
    // Cleanup after run
    location.delete(true);
    logStatsManager.flush();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
        while (scan.hasNext()) {
            logStatsTable.delete(scan.next().getKey());
        }
    }
    logStatsManager.flush();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) FileSet(co.cask.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location)

Example 9 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTestRun method testStreamFormatSpec.

@Test
public void testStreamFormatSpec() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    StreamManager stream = getStreamManager("PeopleStream");
    stream.send("Old Man,50");
    stream.send("Baby,1");
    stream.send("Young Guy,18");
    stream.send("Small Kid,5");
    stream.send("Legal Drinker,21");
    Map<String, String> outputArgs = new HashMap<>();
    FileSetArguments.setOutputPath(outputArgs, "output");
    Map<String, String> runtimeArgs = new HashMap<>();
    runtimeArgs.putAll(RuntimeArguments.addScope(Scope.DATASET, "PeopleFileSet", outputArgs));
    runtimeArgs.put("stream.name", "PeopleStream");
    runtimeArgs.put("output.dataset", "PeopleFileSet");
    runtimeArgs.put("sql.statement", "SELECT name, age FROM people WHERE age >= 21");
    List<String> programs = Arrays.asList(ScalaStreamFormatSpecSpark.class.getSimpleName(), StreamFormatSpecSpark.class.getSimpleName());
    for (String sparkProgramName : programs) {
        // Clean the output before starting
        DataSetManager<FileSet> fileSetManager = getDataset("PeopleFileSet");
        Location outputDir = fileSetManager.get().getLocation("output");
        outputDir.delete(true);
        SparkManager sparkManager = appManager.getSparkManager(sparkProgramName);
        sparkManager.start(runtimeArgs);
        sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
        // Find the output part file. There is only one because the program repartition to 1
        Location outputFile = Iterables.find(outputDir.list(), new Predicate<Location>() {

            @Override
            public boolean apply(Location input) {
                return input.getName().startsWith("part-r-");
            }
        });
        // Verify the result
        List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputFile), Charsets.UTF_8));
        Map<String, Integer> result = new HashMap<>();
        for (String line : lines) {
            String[] parts = line.split(":");
            result.put(parts[0], Integer.parseInt(parts[1]));
        }
        Assert.assertEquals(ImmutableMap.of("Old Man", 50, "Legal Drinker", 21), result);
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) StreamFormatSpecSpark(co.cask.cdap.spark.app.StreamFormatSpecSpark) StreamManager(co.cask.cdap.test.StreamManager) ScalaStreamFormatSpecSpark(co.cask.cdap.spark.app.ScalaStreamFormatSpecSpark) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 10 with ApplicationManager

use of co.cask.cdap.test.ApplicationManager in project cdap by caskdata.

the class SparkTestRun method testScalaSparkWithObjectStore.

@Test
public void testScalaSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Aggregations

ApplicationManager (co.cask.cdap.test.ApplicationManager)188 Test (org.junit.Test)155 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)88 ApplicationId (co.cask.cdap.proto.id.ApplicationId)71 AppRequest (co.cask.cdap.proto.artifact.AppRequest)61 WorkflowManager (co.cask.cdap.test.WorkflowManager)59 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)58 SparkManager (co.cask.cdap.test.SparkManager)52 Table (co.cask.cdap.api.dataset.table.Table)50 ServiceManager (co.cask.cdap.test.ServiceManager)48 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)47 Schema (co.cask.cdap.api.data.schema.Schema)47 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)45 StreamManager (co.cask.cdap.test.StreamManager)43 URL (java.net.URL)33 HashSet (java.util.HashSet)27 ArrayList (java.util.ArrayList)26 IOException (java.io.IOException)25 HashMap (java.util.HashMap)24 Set (java.util.Set)24