use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTestRun method testScalaSparkWithObjectStore.
@Test
public void testScalaSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithPartitionedFileSet.
private void testSparkWithPartitionedFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs");
PartitionedFileSet pfs = pfsManager.get();
PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build());
Location location = partitionOutput.getLocation();
prepareFileInput(location);
partitionOutput.addPartition();
pfsManager.flush();
Map<String, String> inputArgs = new HashMap<>();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, PartitionFilter.builder().addRangeCondition("x", "na", "nx").build());
Map<String, String> outputArgs = new HashMap<>();
PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build();
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", outputArgs));
args.put("input", "pfs");
args.put("output", "pfs");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
pfsManager.flush();
PartitionDetail partition = pfs.getPartition(outputKey);
Assert.assertNotNull(partition);
validateFileOutput(partition.getLocation());
// Cleanup after test completed
pfs.dropPartition(partitionOutput.getPartitionKey());
pfs.dropPartition(partition.getPartitionKey());
pfsManager.flush();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithFileSet.
private void testSparkWithFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
DataSetManager<FileSet> filesetManager = getDataset("fs");
FileSet fileset = filesetManager.get();
Location location = fileset.getLocation("nn");
prepareFileInput(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(inputArgs, "xx");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", outputArgs));
args.put("input", "fs");
args.put("output", "fs");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
validateFileOutput(fileset.getLocation("xx"), "custom:");
// Cleanup paths after running test
fileset.getLocation("nn").delete(true);
fileset.getLocation("xx").delete(true);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testClassicSpark.
@Test
public void testClassicSpark() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) {
final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
}
KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName())));
Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName())));
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testDatasetSQL.
@Test
public void testDatasetSQL() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
DataSetManager<ObjectMappedTable<Person>> tableManager = getDataset("PersonTable");
ObjectMappedTable<Person> table = tableManager.get();
table.write("1", new Person("Bob", 10));
table.write("2", new Person("Bill", 20));
table.write("3", new Person("Berry", 30));
tableManager.flush();
SparkManager sparkManager = appManager.getSparkManager(DatasetSQLSpark.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
// The program executes "SELECT * FROM Person WHERE age > 10", hence expected two new entries for Bill and Berry.
tableManager.flush();
Person person = table.read("new:2");
Assert.assertEquals("Bill", person.name());
Assert.assertEquals(20, person.age());
person = table.read("new:3");
Assert.assertEquals("Berry", person.name());
Assert.assertEquals(30, person.age());
// Shouldn't have new Bob
Assert.assertNull(table.read("new:1"));
}
Aggregations