Search in sources :

Example 6 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTestRun method testScalaSparkWithObjectStore.

@Test
public void testScalaSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 7 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkFileSetTestRun method testSparkWithPartitionedFileSet.

private void testSparkWithPartitionedFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
    DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs");
    PartitionedFileSet pfs = pfsManager.get();
    PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build());
    Location location = partitionOutput.getLocation();
    prepareFileInput(location);
    partitionOutput.addPartition();
    pfsManager.flush();
    Map<String, String> inputArgs = new HashMap<>();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, PartitionFilter.builder().addRangeCondition("x", "na", "nx").build());
    Map<String, String> outputArgs = new HashMap<>();
    PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", outputArgs));
    args.put("input", "pfs");
    args.put("output", "pfs");
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
    pfsManager.flush();
    PartitionDetail partition = pfs.getPartition(outputKey);
    Assert.assertNotNull(partition);
    validateFileOutput(partition.getLocation());
    // Cleanup after test completed
    pfs.dropPartition(partitionOutput.getPartitionKey());
    pfs.dropPartition(partition.getPartitionKey());
    pfsManager.flush();
}
Also used : SparkManager(co.cask.cdap.test.SparkManager) PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) HashMap(java.util.HashMap) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Location(org.apache.twill.filesystem.Location)

Example 8 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkFileSetTestRun method testSparkWithFileSet.

private void testSparkWithFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
    DataSetManager<FileSet> filesetManager = getDataset("fs");
    FileSet fileset = filesetManager.get();
    Location location = fileset.getLocation("nn");
    prepareFileInput(location);
    Map<String, String> inputArgs = new HashMap<>();
    FileSetArguments.setInputPath(inputArgs, "nn");
    Map<String, String> outputArgs = new HashMap<>();
    FileSetArguments.setOutputPath(inputArgs, "xx");
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", inputArgs));
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", outputArgs));
    args.put("input", "fs");
    args.put("output", "fs");
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    validateFileOutput(fileset.getLocation("xx"), "custom:");
    // Cleanup paths after running test
    fileset.getLocation("nn").delete(true);
    fileset.getLocation("xx").delete(true);
}
Also used : SparkManager(co.cask.cdap.test.SparkManager) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) SparkAppUsingFileSet(co.cask.cdap.spark.app.SparkAppUsingFileSet) FileSet(co.cask.cdap.api.dataset.lib.FileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) HashMap(java.util.HashMap) Location(org.apache.twill.filesystem.Location)

Example 9 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testClassicSpark.

@Test
public void testClassicSpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) {
        final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName()).start();
        sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    }
    KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
    Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName())));
    Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName())));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TestSparkApp(co.cask.cdap.spark.app.TestSparkApp) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) ScalaClassicSparkProgram(co.cask.cdap.spark.app.ScalaClassicSparkProgram) ClassicSparkProgram(co.cask.cdap.spark.app.ClassicSparkProgram) Test(org.junit.Test)

Example 10 with SparkManager

use of co.cask.cdap.test.SparkManager in project cdap by caskdata.

the class SparkTest method testDatasetSQL.

@Test
public void testDatasetSQL() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    DataSetManager<ObjectMappedTable<Person>> tableManager = getDataset("PersonTable");
    ObjectMappedTable<Person> table = tableManager.get();
    table.write("1", new Person("Bob", 10));
    table.write("2", new Person("Bill", 20));
    table.write("3", new Person("Berry", 30));
    tableManager.flush();
    SparkManager sparkManager = appManager.getSparkManager(DatasetSQLSpark.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    // The program executes "SELECT * FROM Person WHERE age > 10", hence expected two new entries for Bill and Berry.
    tableManager.flush();
    Person person = table.read("new:2");
    Assert.assertEquals("Bill", person.name());
    Assert.assertEquals(20, person.age());
    person = table.read("new:3");
    Assert.assertEquals("Berry", person.name());
    Assert.assertEquals(30, person.age());
    // Shouldn't have new Bob
    Assert.assertNull(table.read("new:1"));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) ObjectMappedTable(co.cask.cdap.api.dataset.lib.ObjectMappedTable) Person(co.cask.cdap.spark.app.Person) Test(org.junit.Test)

Aggregations

SparkManager (co.cask.cdap.test.SparkManager)58 ApplicationManager (co.cask.cdap.test.ApplicationManager)52 Test (org.junit.Test)48 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)29 StreamManager (co.cask.cdap.test.StreamManager)21 HashMap (java.util.HashMap)14 ImmutableSet (com.google.common.collect.ImmutableSet)13 Set (java.util.Set)13 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 TimeoutException (java.util.concurrent.TimeoutException)12 Schema (co.cask.cdap.api.data.schema.Schema)10 Table (co.cask.cdap.api.dataset.table.Table)10 ServiceManager (co.cask.cdap.test.ServiceManager)10 IOException (java.io.IOException)10 URL (java.net.URL)10 Location (org.apache.twill.filesystem.Location)10 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)9 ApplicationId (co.cask.cdap.proto.id.ApplicationId)9 HashSet (java.util.HashSet)9 AppRequest (co.cask.cdap.proto.artifact.AppRequest)8