use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class MessagingAppTestRun method testSparkMessaging.
@Test
public void testSparkMessaging() throws Exception {
ApplicationManager appManager = deployWithArtifact(NAMESPACE, MessagingApp.class, artifactJar);
final SparkManager sparkManager = appManager.getSparkManager(MessagingSpark.class.getSimpleName()).start();
final MessageFetcher fetcher = getMessagingContext().getMessageFetcher();
final AtomicReference<String> messageId = new AtomicReference<>();
// Wait for the Spark to create the topic
final MessagingAdmin messagingAdmin = getMessagingAdmin(NAMESPACE.getNamespace());
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
try {
messagingAdmin.getTopicProperties(MessagingApp.TOPIC);
return true;
} catch (TopicNotFoundException e) {
return false;
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// This is to verify failed transaction is not publishing anything.
for (String expected : Arrays.asList("start", "block")) {
Tasks.waitFor(expected, new Callable<String>() {
@Override
public String call() throws Exception {
try (CloseableIterator<Message> iterator = fetcher.fetch(NAMESPACE.getNamespace(), MessagingApp.TOPIC, 1, messageId.get())) {
if (!iterator.hasNext()) {
return null;
}
Message message = iterator.next();
messageId.set(message.getId());
return message.getPayloadAsString();
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
// Publish a control message to unblock the Spark execution
getMessagingContext().getMessagePublisher().publish(NAMESPACE.getNamespace(), MessagingApp.CONTROL_TOPIC, "go");
// Expects a result message as "result-15", where 15 is the sum of 1,2,3,4,5
Tasks.waitFor("result-15", new Callable<String>() {
@Override
public String call() throws Exception {
try (CloseableIterator<Message> iterator = fetcher.fetch(NAMESPACE.getNamespace(), MessagingApp.TOPIC, 1, messageId.get())) {
if (!iterator.hasNext()) {
return null;
}
Message message = iterator.next();
messageId.set(message.getId());
return message.getPayloadAsString();
}
}
}, 60, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithPartitionedFileSet.
private void testSparkWithPartitionedFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs");
PartitionedFileSet pfs = pfsManager.get();
PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build());
Location location = partitionOutput.getLocation();
prepareFileInput(location);
partitionOutput.addPartition();
pfsManager.flush();
Map<String, String> inputArgs = new HashMap<>();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, PartitionFilter.builder().addRangeCondition("x", "na", "nx").build());
Map<String, String> outputArgs = new HashMap<>();
PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build();
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", outputArgs));
args.put("input", "pfs");
args.put("output", "pfs");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
pfsManager.flush();
PartitionDetail partition = pfs.getPartition(outputKey);
Assert.assertNotNull(partition);
validateFileOutput(partition.getLocation());
// Cleanup after test completed
pfs.dropPartition(partitionOutput.getPartitionKey());
pfs.dropPartition(partition.getPartitionKey());
pfsManager.flush();
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class SparkFileSetTestRun method testSparkWithTimePartitionedFileSet.
private void testSparkWithTimePartitionedFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
long customOutputPartitionKey = 123456789L;
long customInputPartitionKey = 987654321L;
DataSetManager<TimePartitionedFileSet> tpfsManager = getDataset("tpfs");
long inputTime = System.currentTimeMillis();
long outputTime = inputTime + TimeUnit.HOURS.toMillis(1);
addTimePartition(tpfsManager, inputTime);
addTimePartition(tpfsManager, customInputPartitionKey);
Map<String, String> inputArgs = new HashMap<>();
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, inputTime - 100);
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, inputTime + 100);
Map<String, String> outputArgs = new HashMap<>();
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, outputTime);
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "tpfs", inputArgs));
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "tpfs", outputArgs));
args.put("input", "tpfs");
args.put("output", "tpfs");
args.put("outputKey", String.valueOf(customOutputPartitionKey));
args.put("inputKey", String.valueOf(customInputPartitionKey));
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.MINUTES);
tpfsManager.flush();
TimePartitionedFileSet tpfs = tpfsManager.get();
PartitionDetail partition = tpfs.getPartitionByTime(outputTime);
Assert.assertNotNull("Output partition is null while for running without custom dataset arguments", partition);
validateFileOutput(partition.getLocation());
PartitionDetail customPartition = tpfs.getPartitionByTime(customOutputPartitionKey);
Assert.assertNotNull("Output partition is null while for running with custom dataset arguments", customPartition);
validateFileOutput(customPartition.getLocation());
// Cleanup after running the test
tpfs.dropPartition(inputTime);
tpfs.dropPartition(customInputPartitionKey);
tpfs.dropPartition(partition.getPartitionKey());
tpfs.dropPartition(customPartition.getPartitionKey());
tpfsManager.flush();
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testClassicSpark.
@Test
public void testClassicSpark() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) {
final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName());
sparkManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
}
KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName())));
Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName())));
}
use of io.cdap.cdap.test.SparkManager in project cdap by caskdata.
the class SparkTest method testTransaction.
@Test
public void testTransaction() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
// Write some data to a local file
File inputFile = TEMP_FOLDER.newFile();
try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(inputFile.toPath(), StandardCharsets.UTF_8))) {
writer.println("red fox");
writer.println("brown fox");
writer.println("grey fox");
writer.println("brown bear");
writer.println("black bear");
}
// Run the spark program
SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("input.file", inputFile.getAbsolutePath(), "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
// Verify result from dataset before the Spark program terminates
final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
final KeyValueTable resultTable = resultManager.get();
// Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), () -> {
// This is to start a new TX
resultManager.flush();
LOG.info("Reading from threshold result");
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
return ImmutableSet.copyOf(Iterators.transform(itor, input -> {
String word = Bytes.toString(input.getKey());
LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
return word;
}));
}
}, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Aggregations