use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testStreamSQL.
@Test
public void testStreamSQL() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
// Create a stream and set the format and schema
StreamManager streamManager = getStreamManager("sqlStream");
streamManager.createStream();
Schema schema = Schema.recordOf("person", Schema.Field.of("firstName", Schema.of(Schema.Type.STRING)), Schema.Field.of("lastName", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.INT)));
streamManager.setStreamProperties(new StreamProperties(86400L, new FormatSpecification("csv", schema), 1000));
// Send some events. Send them with one milliseconds apart so that we can test the timestamp filter
streamManager.send("Bob,Robert,15");
TimeUnit.MILLISECONDS.sleep(1);
streamManager.send("Eddy,Edison,35");
TimeUnit.MILLISECONDS.sleep(1);
streamManager.send("Thomas,Edison,60");
TimeUnit.MILLISECONDS.sleep(1);
streamManager.send("Tom,Thomson,50");
TimeUnit.MILLISECONDS.sleep(1);
streamManager.send("Roy,Thomson,8");
TimeUnit.MILLISECONDS.sleep(1);
streamManager.send("Jane,Jenny,6");
// Run the testing spark program
SparkManager sparkManager = appManager.getSparkManager(StreamSQLSpark.class.getSimpleName()).start(Collections.singletonMap("input.stream", "sqlStream"));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testSparkWithGetDataset.
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
ApplicationManager applicationManager = deploy(appClass);
DataSetManager<FileSet> filesetManager = getDataset("logs");
FileSet fileset = filesetManager.get();
Location location = fileset.getLocation("nn");
prepareInputFileSetWithLogData(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
args.put("input", "logs");
args.put("output", "logStats");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
KeyValueTable logStatsTable = logStatsManager.get();
validateGetDatasetOutput(logStatsTable);
// Cleanup after run
location.delete(true);
logStatsManager.flush();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
while (scan.hasNext()) {
logStatsTable.delete(scan.next().getKey());
}
}
logStatsManager.flush();
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testStreamFormatSpec.
@Test
public void testStreamFormatSpec() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
StreamManager stream = getStreamManager("PeopleStream");
stream.send("Old Man,50");
stream.send("Baby,1");
stream.send("Young Guy,18");
stream.send("Small Kid,5");
stream.send("Legal Drinker,21");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(outputArgs, "output");
Map<String, String> runtimeArgs = new HashMap<>();
runtimeArgs.putAll(RuntimeArguments.addScope(Scope.DATASET, "PeopleFileSet", outputArgs));
runtimeArgs.put("stream.name", "PeopleStream");
runtimeArgs.put("output.dataset", "PeopleFileSet");
runtimeArgs.put("sql.statement", "SELECT name, age FROM people WHERE age >= 21");
List<String> programs = Arrays.asList(ScalaStreamFormatSpecSpark.class.getSimpleName(), StreamFormatSpecSpark.class.getSimpleName());
for (String sparkProgramName : programs) {
// Clean the output before starting
DataSetManager<FileSet> fileSetManager = getDataset("PeopleFileSet");
Location outputDir = fileSetManager.get().getLocation("output");
outputDir.delete(true);
SparkManager sparkManager = appManager.getSparkManager(sparkProgramName);
sparkManager.start(runtimeArgs);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 180, TimeUnit.SECONDS);
// Find the output part file. There is only one because the program repartition to 1
Location outputFile = Iterables.find(outputDir.list(), new Predicate<Location>() {
@Override
public boolean apply(Location input) {
return input.getName().startsWith("part-r-");
}
});
// Verify the result
List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputFile), Charsets.UTF_8));
Map<String, Integer> result = new HashMap<>();
for (String line : lines) {
String[] parts = line.split(":");
result.put(parts[0], Integer.parseInt(parts[1]));
}
Assert.assertEquals(ImmutableMap.of("Old Man", 50, "Legal Drinker", 21), result);
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testSparkServicePlugin.
@Test
public void testSparkServicePlugin() throws Exception {
addPluginArtifact(NamespaceId.DEFAULT.artifact("plugin", "1.0"), Collections.emptySet(), StringLengthFunc.class);
// Generate some lines to a file
File file = TMP_FOLDER.newFile();
try (PrintStream printer = new PrintStream(file)) {
for (int i = 0; i < 1000; i++) {
printer.printf("Message %d\n", i);
}
}
ApplicationManager applicationManager = deploy(TestSparkApp.class);
SparkManager sparkManager = applicationManager.getSparkManager(SparkServiceProgram.class.getSimpleName()).start();
URL url = sparkManager.getServiceURL(5, TimeUnit.MINUTES);
Assert.assertNotNull(url);
URL pluginURL = url.toURI().resolve("plugin?pluginType=function&pluginName=len&file=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8")).toURL();
HttpURLConnection urlConn = (HttpURLConnection) pluginURL.openConnection();
Assert.assertEquals(200, urlConn.getResponseCode());
try (Reader reader = new InputStreamReader(urlConn.getInputStream(), "UTF-8")) {
Map<String, Integer> result = new Gson().fromJson(reader, new TypeToken<Map<String, Integer>>() {
}.getType());
// The result should be from each line in the file to the length of the line
Assert.assertEquals(1000, result.size());
Assert.assertTrue(Files.lines(file.toPath()).allMatch(line -> result.getOrDefault(line, -1) == line.length()));
}
// Deploy the UDT plugin and test the plugin extending plugin case
addPluginArtifact(NamespaceId.DEFAULT.artifact("pluggable", "1.0"), Collections.emptySet(), PluggableFunc.class);
addPluginArtifact(NamespaceId.DEFAULT.artifact("lenudt", "1.0"), NamespaceId.DEFAULT.artifact("pluggable", "1.0"), StringLengthUDT.class);
pluginURL = url.toURI().resolve("udtPlugin?udtName=len&file=" + URLEncoder.encode(file.getAbsolutePath(), "UTF-8")).toURL();
urlConn = (HttpURLConnection) pluginURL.openConnection();
Assert.assertEquals(200, urlConn.getResponseCode());
try (Reader reader = new InputStreamReader(urlConn.getInputStream(), "UTF-8")) {
Map<String, Integer> result = new Gson().fromJson(reader, new TypeToken<Map<String, Integer>>() {
}.getType());
// The result should be from each line in the file to the length of the line
Assert.assertEquals(1000, result.size());
Assert.assertTrue(Files.lines(file.toPath()).allMatch(line -> result.getOrDefault(line, -1) == line.length()));
}
}
use of co.cask.cdap.test.SparkManager in project cdap by caskdata.
the class SparkStreamingTestRun method test.
@Test
public void test() throws Exception {
File checkpointDir = TEMP_FOLDER.newFolder();
KafkaPublisher publisher = KAFKA_TESTER.getKafkaClient().getPublisher(KafkaPublisher.Ack.LEADER_RECEIVED, Compression.NONE);
ApplicationManager appManager = deployApplication(TestSparkApp.class);
Map<String, String> args = ImmutableMap.of("checkpoint.path", checkpointDir.getAbsolutePath(), "kafka.brokers", KAFKA_TESTER.getBrokerService().getBrokerList(), "kafka.topics", "testtopic", "result.dataset", "TimeSeriesResult");
SparkManager manager = appManager.getSparkManager(KafkaSparkStreaming.class.getSimpleName());
manager.start(args);
// Send 100 messages over 5 seconds
for (int i = 0; i < 100; i++) {
publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
TimeUnit.MILLISECONDS.sleep(50);
}
// Sum up everything from the TimeSeriesTable. The "Message" should have count 100, while each number (0-99) should
// have count of 1
final DataSetManager<TimeseriesTable> tsTableManager = getDataset("TimeSeriesResult");
final TimeseriesTable tsTable = tsTableManager.get();
Tasks.waitFor(100L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts("Message", tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
for (int i = 0; i < 100; i++) {
final int finalI = i;
Tasks.waitFor(1L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts(Integer.toString(finalI), tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
}
manager.stop();
manager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
// Send 100 more messages without pause
for (int i = 100; i < 200; i++) {
publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
}
// Start the streaming app again. It should resume from where it left off because of checkpoint
manager.start(args);
// Expects "Message" having count = 200.
Tasks.waitFor(100L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts("Message", tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
// Expects each number (0-199) have count of 1
for (int i = 0; i < 200; i++) {
final int finalI = i;
Tasks.waitFor(1L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts(Integer.toString(finalI), tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
}
manager.stop();
manager.waitForRuns(ProgramRunStatus.KILLED, 2, 60, TimeUnit.SECONDS);
}
Aggregations