use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkKMeansAppTest method test.
@Test
public void test() throws Exception {
// Deploy the Application
ApplicationManager appManager = deployApplication(SparkKMeansApp.class);
// Start the Flow
FlowManager flowManager = appManager.getFlowManager("PointsFlow").start();
// Send a few points to the stream
StreamManager streamManager = getStreamManager("pointsStream");
// one cluster around (0, 500, 0) and another around (100, 0, 0)
for (int i = 0; i < 100; i++) {
double diff = Math.random() / 100;
streamManager.send(String.format("%f %f %f", diff, 500 + diff, diff));
streamManager.send(String.format("%f %f %f", 100 + diff, diff, diff));
}
// Wait for the events to be processed, or at most 5 seconds
RuntimeMetrics metrics = flowManager.getFlowletMetrics("reader");
metrics.waitForProcessed(200, 10, TimeUnit.SECONDS);
// Start a Spark Program
SparkManager sparkManager = appManager.getSparkManager("SparkKMeansProgram").start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
flowManager.stop();
// Start CentersService
ServiceManager serviceManager = appManager.getServiceManager(SparkKMeansApp.CentersService.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForStatus(true);
// Request data and verify it
String response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/0"));
String[] coordinates = response.split(",");
int x0 = Double.valueOf(coordinates[0]).intValue();
int y0 = Double.valueOf(coordinates[1]).intValue();
int z0 = Double.valueOf(coordinates[2]).intValue();
response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/1"));
coordinates = response.split(",");
int x1 = Double.valueOf(coordinates[0]).intValue();
int y1 = Double.valueOf(coordinates[1]).intValue();
int z1 = Double.valueOf(coordinates[2]).intValue();
// one cluster should be around (0, 500, 0) and the other around (100, 0, 0)
if (x0 == 100) {
Assert.assertEquals(0, y0);
Assert.assertEquals(0, z0);
Assert.assertEquals(0, x1);
Assert.assertEquals(500, y1);
Assert.assertEquals(0, z1);
} else {
Assert.assertEquals(0, x0);
Assert.assertEquals(500, y0);
Assert.assertEquals(0, z0);
Assert.assertEquals(100, x1);
Assert.assertEquals(0, y1);
Assert.assertEquals(0, z1);
}
// Request data by incorrect index and verify response
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "centers/10");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
Assert.assertEquals(HttpURLConnection.HTTP_NO_CONTENT, conn.getResponseCode());
} finally {
conn.disconnect();
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class AuthorizationTest method testSparkStreamAuth.
@Test
@Category(SlowTests.class)
public void testSparkStreamAuth() throws Exception {
createAuthNamespace();
Authorizer authorizer = getAuthorizer();
setUpPrivilegeToDeployStreamAuthApp();
StreamId streamId = AUTH_NAMESPACE.stream(StreamAuthApp.STREAM);
Map<EntityId, Set<Action>> additionalPrivileges = ImmutableMap.<EntityId, Set<Action>>builder().put(streamId, EnumSet.of(Action.READ, Action.WRITE)).put(AUTH_NAMESPACE.app(StreamAuthApp.APP).spark(StreamAuthApp.SPARK), EnumSet.of(Action.EXECUTE)).put(AUTH_NAMESPACE.dataset(StreamAuthApp.KVTABLE), EnumSet.of(Action.READ, Action.WRITE)).build();
setUpPrivilegeAndRegisterForDeletion(ALICE, additionalPrivileges);
ApplicationManager appManager = deployApplication(AUTH_NAMESPACE, StreamAuthApp.class);
StreamManager streamManager = getStreamManager(streamId);
streamManager.send("Hello");
final SparkManager sparkManager = appManager.getSparkManager(StreamAuthApp.SPARK);
sparkManager.start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> kvManager = getDataset(AUTH_NAMESPACE.dataset(StreamAuthApp.KVTABLE));
try (KeyValueTable kvTable = kvManager.get()) {
byte[] value = kvTable.read("Hello");
Assert.assertArrayEquals(Bytes.toBytes("Hello"), value);
}
streamManager.send("World");
// Revoke READ permission on STREAM for Alice
authorizer.revoke(Authorizable.fromEntityId(streamId), ALICE, EnumSet.of(Action.READ));
sparkManager.start();
sparkManager.waitForRun(ProgramRunStatus.FAILED, 1, TimeUnit.MINUTES);
kvManager = getDataset(AUTH_NAMESPACE.dataset(StreamAuthApp.KVTABLE));
try (KeyValueTable kvTable = kvManager.get()) {
byte[] value = kvTable.read("World");
Assert.assertNull(value);
}
// Grant ALICE READ permission on STREAM and now Spark job should run successfully
authorizer.grant(Authorizable.fromEntityId(streamId), ALICE, ImmutableSet.of(Action.READ));
sparkManager.start();
sparkManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 1, TimeUnit.MINUTES);
kvManager = getDataset(AUTH_NAMESPACE.dataset(StreamAuthApp.KVTABLE));
try (KeyValueTable kvTable = kvManager.get()) {
byte[] value = kvTable.read("World");
Assert.assertArrayEquals(Bytes.toBytes("World"), value);
}
appManager.delete();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class AuthorizationTest method testCrossNSSpark.
@Test
public void testCrossNSSpark() throws Exception {
createAuthNamespace();
ApplicationId appId = AUTH_NAMESPACE.app(TestSparkCrossNSDatasetApp.APP_NAME);
Map<EntityId, Set<Action>> neededPrivileges = ImmutableMap.<EntityId, Set<Action>>builder().put(appId, EnumSet.of(Action.ADMIN)).put(AUTH_NAMESPACE.artifact(TestSparkCrossNSDatasetApp.class.getSimpleName(), "1.0-SNAPSHOT"), EnumSet.of(Action.ADMIN)).put(AUTH_NAMESPACE.dataset(TestSparkCrossNSDatasetApp.DEFAULT_OUTPUT_DATASET), EnumSet.of(Action.ADMIN)).put(AUTH_NAMESPACE.datasetType(KeyValueTable.class.getName()), EnumSet.of(Action.ADMIN)).build();
setUpPrivilegeAndRegisterForDeletion(ALICE, neededPrivileges);
ProgramId programId = appId.spark(TestSparkCrossNSDatasetApp.SPARK_PROGRAM_NAME);
// bob will be executing the program
grantAndAssertSuccess(programId, BOB, EnumSet.of(Action.EXECUTE));
cleanUpEntities.add(programId);
ApplicationManager appManager = deployApplication(AUTH_NAMESPACE, TestSparkCrossNSDatasetApp.class);
SparkManager sparkManager = appManager.getSparkManager(TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.class.getSimpleName());
testCrossNSSystemDatasetAccessWithAuthSpark(sparkManager);
testCrossNSDatasetAccessWithAuthSpark(sparkManager);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithTimePartitionedFileSet.
private void testSparkWithTimePartitionedFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
long customOutputPartitionKey = 123456789L;
long customInputPartitionKey = 987654321L;
DataSetManager<TimePartitionedFileSet> tpfsManager = getDataset("tpfs");
long inputTime = System.currentTimeMillis();
long outputTime = inputTime + TimeUnit.HOURS.toMillis(1);
addTimePartition(tpfsManager, inputTime);
addTimePartition(tpfsManager, customInputPartitionKey);
Map<String, String> inputArgs = new HashMap<>();
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, inputTime - 100);
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, inputTime + 100);
Map<String, String> outputArgs = new HashMap<>();
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, outputTime);
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "tpfs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "tpfs", outputArgs));
args.put("input", "tpfs");
args.put("output", "tpfs");
args.put("outputKey", String.valueOf(customOutputPartitionKey));
args.put("inputKey", String.valueOf(customInputPartitionKey));
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
tpfsManager.flush();
TimePartitionedFileSet tpfs = tpfsManager.get();
PartitionDetail partition = tpfs.getPartitionByTime(outputTime);
Assert.assertNotNull("Output partition is null while for running without custom dataset arguments", partition);
validateFileOutput(partition.getLocation());
PartitionDetail customPartition = tpfs.getPartitionByTime(customOutputPartitionKey);
Assert.assertNotNull("Output partition is null while for running with custom dataset arguments", customPartition);
validateFileOutput(customPartition.getLocation());
// Cleanup after running the test
tpfs.dropPartition(inputTime);
tpfs.dropPartition(customInputPartitionKey);
tpfs.dropPartition(partition.getPartitionKey());
tpfs.dropPartition(customPartition.getPartitionKey());
tpfsManager.flush();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithCustomFileSet.
private void testSparkWithCustomFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
DataSetManager<SparkAppUsingFileSet.MyFileSet> myFileSetManager = getDataset("myfs");
SparkAppUsingFileSet.MyFileSet myfileset = myFileSetManager.get();
FileSet fileset = myfileset.getEmbeddedFileSet();
Location location = fileset.getLocation("nn");
prepareFileInput(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(inputArgs, "xx");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "myfs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "myfs", outputArgs));
args.put("input", "myfs");
args.put("output", "myfs");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
Assert.assertEquals(1, sparkManager.getHistory(ProgramRunStatus.COMPLETED).size());
validateFileOutput(fileset.getLocation("xx"));
// verify that onSuccess() was called and onFailure() was not
Assert.assertTrue(myfileset.getSuccessLocation().exists());
Assert.assertFalse(myfileset.getFailureLocation().exists());
myfileset.getSuccessLocation().delete();
// run the program again. It should fail due to existing output.
sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.FAILED, 2, TimeUnit.MINUTES);
// Then we can verify that onFailure() was called.
Assert.assertFalse(myfileset.getSuccessLocation().exists());
Assert.assertTrue(myfileset.getFailureLocation().exists());
// Cleanup the paths after running the Spark program
fileset.getLocation("nn").delete(true);
fileset.getLocation("xx").delete(true);
myfileset.getSuccessLocation().delete(true);
myfileset.getFailureLocation().delete(true);
}
Aggregations