use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class TestFrameworkTestRun method testAppWithPlugin.
@Test
public void testAppWithPlugin() throws Exception {
ArtifactId artifactId = NamespaceId.DEFAULT.artifact("app-with-plugin", "1.0.0-SNAPSHOT");
addAppArtifact(artifactId, AppWithPlugin.class);
ArtifactId pluginArtifactId = NamespaceId.DEFAULT.artifact("test-plugin", "1.0.0-SNAPSHOT");
addPluginArtifact(pluginArtifactId, artifactId, ToStringPlugin.class);
ApplicationId appId = NamespaceId.DEFAULT.app("AppWithPlugin");
AppRequest createRequest = new AppRequest(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()));
ApplicationManager appManager = deployApplication(appId, createRequest);
final WorkerManager workerManager = appManager.getWorkerManager(AppWithPlugin.WORKER);
workerManager.start();
workerManager.waitForStatus(false, 5, 1);
Tasks.waitFor(false, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return workerManager.getHistory(ProgramRunStatus.COMPLETED).isEmpty();
}
}, 5, TimeUnit.SECONDS, 10, TimeUnit.MILLISECONDS);
final ServiceManager serviceManager = appManager.getServiceManager(AppWithPlugin.SERVICE);
serviceManager.start();
serviceManager.waitForStatus(true, 1, 10);
URL serviceURL = serviceManager.getServiceURL(5, TimeUnit.SECONDS);
callServiceGet(serviceURL, "dummy");
serviceManager.stop();
serviceManager.waitForStatus(false, 1, 10);
Tasks.waitFor(false, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return serviceManager.getHistory(ProgramRunStatus.KILLED).isEmpty();
}
}, 5, TimeUnit.SECONDS, 10, TimeUnit.MILLISECONDS);
WorkflowManager workflowManager = appManager.getWorkflowManager(AppWithPlugin.WORKFLOW);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
List<RunRecord> runRecords = workflowManager.getHistory();
Assert.assertNotEquals(ProgramRunStatus.FAILED, runRecords.get(0).getStatus());
DataSetManager<KeyValueTable> workflowTableManager = getDataset(AppWithPlugin.WORKFLOW_TABLE);
String value = Bytes.toString(workflowTableManager.get().read("val"));
Assert.assertEquals(AppWithPlugin.TEST, value);
Map<String, String> workflowTags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, "AppWithPlugin", Constants.Metrics.Tag.WORKFLOW, AppWithPlugin.WORKFLOW, Constants.Metrics.Tag.RUN_ID, runRecords.get(0).getPid());
getMetricsManager().waitForTotalMetricCount(workflowTags, String.format("user.destroy.%s", AppWithPlugin.WORKFLOW), 1, 60, TimeUnit.SECONDS);
// Testing Spark Plugins. First send some data to stream for the Spark program to process
StreamManager streamManager = getStreamManager(AppWithPlugin.SPARK_STREAM);
for (int i = 0; i < 5; i++) {
streamManager.send("Message " + i);
}
SparkManager sparkManager = appManager.getSparkManager(AppWithPlugin.SPARK).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
// Verify the Spark result.
DataSetManager<Table> dataSetManager = getDataset(AppWithPlugin.SPARK_TABLE);
Table table = dataSetManager.get();
try (Scanner scanner = table.scan(null, null)) {
for (int i = 0; i < 5; i++) {
Row row = scanner.next();
Assert.assertNotNull(row);
String expected = "Message " + i + " " + AppWithPlugin.TEST;
Assert.assertEquals(expected, Bytes.toString(row.getRow()));
Assert.assertEquals(expected, Bytes.toString(row.get(expected)));
}
// There shouldn't be any more rows in the table.
Assert.assertNull(scanner.next());
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class MessagingAppTestRun method testSparkMessaging.
@Test
public void testSparkMessaging() throws Exception {
ApplicationManager appManager = deployWithArtifact(NAMESPACE, MessagingApp.class, artifactJar);
final SparkManager sparkManager = appManager.getSparkManager(MessagingSpark.class.getSimpleName()).start();
final MessageFetcher fetcher = getMessagingContext().getMessageFetcher();
final AtomicReference<String> messageId = new AtomicReference<>();
// Wait for the Spark to create the topic
final MessagingAdmin messagingAdmin = getMessagingAdmin(NAMESPACE.getNamespace());
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
try {
messagingAdmin.getTopicProperties(MessagingApp.TOPIC);
return true;
} catch (TopicNotFoundException e) {
return false;
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// This is to verify failed transaction is not publishing anything.
for (String expected : Arrays.asList("start", "block")) {
Tasks.waitFor(expected, new Callable<String>() {
@Override
public String call() throws Exception {
try (CloseableIterator<Message> iterator = fetcher.fetch(NAMESPACE.getNamespace(), MessagingApp.TOPIC, 1, messageId.get())) {
if (!iterator.hasNext()) {
return null;
}
Message message = iterator.next();
messageId.set(message.getId());
return message.getPayloadAsString();
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
// Publish a control message to unblock the Spark execution
getMessagingContext().getMessagePublisher().publish(NAMESPACE.getNamespace(), MessagingApp.CONTROL_TOPIC, "go");
// Expects a result message as "result-15", where 15 is the sum of 1,2,3,4,5
Tasks.waitFor("result-15", new Callable<String>() {
@Override
public String call() throws Exception {
try (CloseableIterator<Message> iterator = fetcher.fetch(NAMESPACE.getNamespace(), MessagingApp.TOPIC, 1, messageId.get())) {
if (!iterator.hasNext()) {
return null;
}
Message message = iterator.next();
messageId.set(message.getId());
return message.getPayloadAsString();
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class TestFrameworkTestRun method testClusterName.
@Test
public void testClusterName() throws Exception {
String clusterName = getConfiguration().get(Constants.CLUSTER_NAME);
ApplicationManager appManager = deployApplication(ClusterNameTestApp.class);
final DataSetManager<KeyValueTable> datasetManager = getDataset(ClusterNameTestApp.CLUSTER_NAME_TABLE);
final KeyValueTable clusterNameTable = datasetManager.get();
// A callable for reading the cluster name from the ClusterNameTable.
// It is used for Tasks.waitFor call down below.
final AtomicReference<String> key = new AtomicReference<>();
Callable<String> readClusterName = new Callable<String>() {
@Nullable
@Override
public String call() throws Exception {
datasetManager.flush();
byte[] bytes = clusterNameTable.read(key.get());
return bytes == null ? null : new String(bytes, StandardCharsets.UTF_8);
}
};
// Service
ServiceManager serviceManager = appManager.getServiceManager(ClusterNameTestApp.ClusterNameServiceHandler.class.getSimpleName()).start();
Assert.assertEquals(clusterName, callServiceGet(serviceManager.getServiceURL(10, TimeUnit.SECONDS), "clusterName"));
serviceManager.stop();
// Worker
WorkerManager workerManager = appManager.getWorkerManager(ClusterNameTestApp.ClusterNameWorker.class.getSimpleName()).start();
key.set("worker.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// The worker will stop by itself. No need to call stop
workerManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.SECONDS);
// Flow
FlowManager flowManager = appManager.getFlowManager(ClusterNameTestApp.ClusterNameFlow.class.getSimpleName()).start();
key.set("flow.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
flowManager.stop();
// MapReduce
// Setup the input file used by MR
Location location = this.<FileSet>getDataset(ClusterNameTestApp.INPUT_FILE_SET).get().getLocation("input");
try (PrintStream printer = new PrintStream(location.getOutputStream(), true, "UTF-8")) {
for (int i = 0; i < 10; i++) {
printer.println("Hello World " + i);
}
}
// Setup input and output dataset arguments
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "input");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(outputArgs, "output");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, ClusterNameTestApp.INPUT_FILE_SET, inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, ClusterNameTestApp.OUTPUT_FILE_SET, outputArgs));
MapReduceManager mrManager = appManager.getMapReduceManager(ClusterNameTestApp.ClusterNameMapReduce.class.getSimpleName()).start(args);
key.set("mr.client.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set("mapper.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set("reducer.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// Spark
SparkManager sparkManager = appManager.getSparkManager(ClusterNameTestApp.ClusterNameSpark.class.getSimpleName()).start();
key.set("spark.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// Workflow
// Cleanup the output path for the MR job in the workflow first
this.<FileSet>getDataset(ClusterNameTestApp.OUTPUT_FILE_SET).get().getLocation("output").delete(true);
args = RuntimeArguments.addScope(Scope.MAPREDUCE, ClusterNameTestApp.ClusterNameMapReduce.class.getSimpleName(), args);
WorkflowManager workflowManager = appManager.getWorkflowManager(ClusterNameTestApp.ClusterNameWorkflow.class.getSimpleName()).start(args);
String prefix = ClusterNameTestApp.ClusterNameWorkflow.class.getSimpleName() + ".";
key.set(prefix + "mr.client.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set(prefix + "mapper.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set(prefix + "reducer.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set(prefix + "spark.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
key.set(prefix + "action.cluster.name");
Tasks.waitFor(clusterName, readClusterName, 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsSparkSinkTest method testSparkSink.
private void testSparkSink(ApplicationManager appManager, final String output) throws Exception {
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start(ImmutableMap.of("tablename", output));
sparkManager.waitForStatus(true, 10, 1);
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return getDataset(output).get() != null;
}
}, 1, TimeUnit.MINUTES);
final DataSetManager<KeyValueTable> outputManager = getDataset(output);
final Map<String, String> expectedKeyValues = ImmutableMap.of("0", "samuel", "1", "jackson", "2", "dwayne", "3", "johnson");
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
outputManager.flush();
Map<String, String> keyValues = co.cask.cdap.etl.mock.spark.streaming.MockSink.getValues(expectedKeyValues.keySet(), outputManager);
return expectedKeyValues.equals(keyValues);
}
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStatus(false, 10, 1);
sparkManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class DataStreamsTest method testAggregatorJoinerMacrosWithCheckpoints.
@Test
public void testAggregatorJoinerMacrosWithCheckpoints() throws Exception {
/*
|--> aggregator --> sink1
users1 --|
|----|
|--> dupeFlagger --> sink2
users2 -------|
*/
Schema userSchema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> users1 = ImmutableList.of(StructuredRecord.builder(userSchema).set("id", 1L).set("name", "Samuel").build(), StructuredRecord.builder(userSchema).set("id", 2L).set("name", "Dwayne").build(), StructuredRecord.builder(userSchema).set("id", 3L).set("name", "Terry").build());
List<StructuredRecord> users2 = ImmutableList.of(StructuredRecord.builder(userSchema).set("id", 1L).set("name", "Samuel").build(), StructuredRecord.builder(userSchema).set("id", 2L).set("name", "Dwayne").build(), StructuredRecord.builder(userSchema).set("id", 4L).set("name", "Terry").build(), StructuredRecord.builder(userSchema).set("id", 5L).set("name", "Christopher").build());
DataStreamsConfig pipelineConfig = DataStreamsConfig.builder().setBatchInterval("5s").addStage(new ETLStage("users1", MockSource.getPlugin(userSchema, users1))).addStage(new ETLStage("users2", MockSource.getPlugin(userSchema, users2))).addStage(new ETLStage("sink1", MockSink.getPlugin("sink1"))).addStage(new ETLStage("sink2", MockSink.getPlugin("sink2"))).addStage(new ETLStage("aggregator", FieldCountAggregator.getPlugin("${aggfield}", "${aggType}"))).addStage(new ETLStage("dupeFlagger", DupeFlagger.getPlugin("users1", "${flagField}"))).addConnection("users1", "aggregator").addConnection("aggregator", "sink1").addConnection("users1", "dupeFlagger").addConnection("users2", "dupeFlagger").addConnection("dupeFlagger", "sink2").build();
AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, pipelineConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ParallelAggApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
// run it once with this set of macros
Map<String, String> arguments = new HashMap<>();
arguments.put("aggfield", "id");
arguments.put("aggType", "long");
arguments.put("flagField", "isDupe");
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start(arguments);
sparkManager.waitForStatus(true, 10, 1);
final DataSetManager<Table> sink1 = getDataset("sink1");
final DataSetManager<Table> sink2 = getDataset("sink2");
Schema aggSchema = Schema.recordOf("user.count", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
final Set<StructuredRecord> expectedAggregates = ImmutableSet.of(StructuredRecord.builder(aggSchema).set("id", 0L).set("ct", 3L).build(), StructuredRecord.builder(aggSchema).set("id", 1L).set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("id", 2L).set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("id", 3L).set("ct", 1L).build());
Schema outputSchema = Schema.recordOf("user.flagged", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("isDupe", Schema.of(Schema.Type.BOOLEAN)));
final Set<StructuredRecord> expectedJoined = ImmutableSet.of(StructuredRecord.builder(outputSchema).set("id", 1L).set("name", "Samuel").set("isDupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 2L).set("name", "Dwayne").set("isDupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 3L).set("name", "Terry").set("isDupe", false).build());
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
sink1.flush();
sink2.flush();
Set<StructuredRecord> actualAggs = new HashSet<>();
Set<StructuredRecord> actualJoined = new HashSet<>();
actualAggs.addAll(MockSink.readOutput(sink1));
actualJoined.addAll(MockSink.readOutput(sink2));
return expectedAggregates.equals(actualAggs) && expectedJoined.equals(actualJoined);
}
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStatus(false, 30, 1);
MockSink.clear(sink1);
MockSink.clear(sink2);
// run it again with different macros to make sure they are re-evaluated and not stored in the checkpoint
arguments = new HashMap<>();
arguments.put("aggfield", "name");
arguments.put("aggType", "string");
arguments.put("flagField", "dupe");
sparkManager.start(arguments);
sparkManager.waitForStatus(true, 10, 1);
aggSchema = Schema.recordOf("user.count", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("ct", Schema.of(Schema.Type.LONG)));
final Set<StructuredRecord> expectedAggregates2 = ImmutableSet.of(StructuredRecord.builder(aggSchema).set("name", "all").set("ct", 3L).build(), StructuredRecord.builder(aggSchema).set("name", "Samuel").set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("name", "Dwayne").set("ct", 1L).build(), StructuredRecord.builder(aggSchema).set("name", "Terry").set("ct", 1L).build());
outputSchema = Schema.recordOf("user.flagged", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("dupe", Schema.of(Schema.Type.BOOLEAN)));
final Set<StructuredRecord> expectedJoined2 = ImmutableSet.of(StructuredRecord.builder(outputSchema).set("id", 1L).set("name", "Samuel").set("dupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 2L).set("name", "Dwayne").set("dupe", true).build(), StructuredRecord.builder(outputSchema).set("id", 3L).set("name", "Terry").set("dupe", false).build());
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
sink1.flush();
sink2.flush();
Set<StructuredRecord> actualAggs = new HashSet<>();
Set<StructuredRecord> actualJoined = new HashSet<>();
actualAggs.addAll(MockSink.readOutput(sink1));
actualJoined.addAll(MockSink.readOutput(sink2));
return expectedAggregates2.equals(actualAggs) && expectedJoined2.equals(actualJoined);
}
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
}
Aggregations