use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTestRun method validateGetDatasetOutput.
private void validateGetDatasetOutput(KeyValueTable logStatsTable) {
SparkAppUsingGetDataset.LogKey fredKey1 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey fredKey2 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogKey bradKey1 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey bradKey2 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogStats fredStats1 = new SparkAppUsingGetDataset.LogStats(2, 100);
SparkAppUsingGetDataset.LogStats fredStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats1 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
Map<SparkAppUsingGetDataset.LogKey, SparkAppUsingGetDataset.LogStats> expected = ImmutableMap.of(fredKey1, fredStats1, fredKey2, fredStats2, bradKey1, bradStats1, bradKey2, bradStats2);
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
// must have 4 records
for (int i = 0; i < 4; i++) {
Assert.assertTrue("Expected next for i = " + i, scan.hasNext());
KeyValue<byte[], byte[]> next = scan.next();
SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class);
SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
Assert.assertEquals(expected.get(logKey), logStats);
}
// no more records
Assert.assertFalse(scan.hasNext());
}
}
use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTestRun method testSparkWithGetDataset.
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
ApplicationManager applicationManager = deploy(appClass);
DataSetManager<FileSet> filesetManager = getDataset("logs");
FileSet fileset = filesetManager.get();
Location location = fileset.getLocation("nn");
prepareInputFileSetWithLogData(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
args.put("input", "logs");
args.put("output", "logStats");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
KeyValueTable logStatsTable = logStatsManager.get();
validateGetDatasetOutput(logStatsTable);
// Cleanup after run
location.delete(true);
logStatsManager.flush();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
while (scan.hasNext()) {
logStatsTable.delete(scan.next().getKey());
}
}
logStatsManager.flush();
}
use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTest method validateGetDatasetOutput.
private void validateGetDatasetOutput(KeyValueTable logStatsTable) {
SparkAppUsingGetDataset.LogKey fredKey1 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey fredKey2 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogKey bradKey1 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 200);
SparkAppUsingGetDataset.LogKey bradKey2 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 404);
SparkAppUsingGetDataset.LogStats fredStats1 = new SparkAppUsingGetDataset.LogStats(2, 100);
SparkAppUsingGetDataset.LogStats fredStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats1 = new SparkAppUsingGetDataset.LogStats(1, 50);
SparkAppUsingGetDataset.LogStats bradStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
Map<SparkAppUsingGetDataset.LogKey, SparkAppUsingGetDataset.LogStats> expected = ImmutableMap.of(fredKey1, fredStats1, fredKey2, fredStats2, bradKey1, bradStats1, bradKey2, bradStats2);
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
// must have 4 records
for (int i = 0; i < 4; i++) {
Assert.assertTrue("Expected next for i = " + i, scan.hasNext());
KeyValue<byte[], byte[]> next = scan.next();
SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class);
SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
Assert.assertEquals(expected.get(logKey), logStats);
}
// no more records
Assert.assertFalse(scan.hasNext());
}
}
use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class FileUploadServiceTestRun method testFileUploadService.
@Test
public void testFileUploadService() throws Exception {
ApplicationManager appManager = deployApplication(FileUploadApp.class);
// Start the service
ServiceManager serviceManager = appManager.getServiceManager(FileUploadApp.SERVICE_NAME).start();
serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
try {
// Upload URL is "base/upload/pfs/[partition_value], which the partition value is a long
URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
// Upload with wrong MD5, should get 400.
byte[] content = Strings.repeat("0123456789 ", 100).getBytes(Charsets.UTF_8);
Assert.assertEquals(HttpURLConnection.HTTP_BAD_REQUEST, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, "123", 30));
long beforeUploadTime = System.currentTimeMillis();
// Upload with right MD5, should get 200
Assert.assertEquals(HttpURLConnection.HTTP_OK, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, BaseEncoding.base64().encode(Hashing.md5().hashBytes(content).asBytes()), 20));
// Inspect the partitioned file set and verify the content
PartitionedFileSet pfs = (PartitionedFileSet) getDataset(FileUploadApp.PFS_NAME).get();
PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build());
Assert.assertNotNull(partition);
// Verify a notification should have been published for the new partition
List<Notification> notifications = getDataNotifications(beforeUploadTime);
// Should have one message
Assert.assertEquals(1, notifications.size());
verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
// There should be one file under the partition directory
List<Location> locations = partition.getLocation().list();
Assert.assertEquals(1, locations.size());
Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));
// Verify the tracking table of chunks sizes
KeyValueTable trackingTable = (KeyValueTable) getDataset(FileUploadApp.KV_TABLE_NAME).get();
CloseableIterator<KeyValue<byte[], byte[]>> iter = trackingTable.scan(null, null);
// Sum up all chunks sizes as being tracked by the tracking table.
long sum = 0;
int iterSize = 0;
while (iter.hasNext()) {
KeyValue<byte[], byte[]> kv = iter.next();
sum += Bytes.toInt(kv.getKey()) * Bytes.toLong(kv.getValue());
iterSize++;
}
// The iterator should have size >= 2, since we uses different chunk size for two different upload
Assert.assertTrue(iterSize >= 2);
// The sum of all chunks sizes should be the same as the
// content size * 2 (since we have one failure, one success upload)
Assert.assertEquals(content.length * 2, sum);
} finally {
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
}
}
use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.
the class SparkTestRun method testTransaction.
@Test
public void testTransaction() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
StreamManager streamManager = getStreamManager("SparkStream");
// Write some sentences to the stream
streamManager.send("red fox");
streamManager.send("brown fox");
streamManager.send("grey fox");
streamManager.send("brown bear");
streamManager.send("black bear");
// Run the spark program
SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
// Verify result from dataset before the Spark program terminates
final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
final KeyValueTable resultTable = resultManager.get();
// Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {
@Override
public Set<String> call() throws Exception {
// This is to start a new TX
resultManager.flush();
LOG.info("Reading from threshold result");
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
return ImmutableSet.copyOf(Iterators.transform(itor, new Function<KeyValue<byte[], byte[]>, String>() {
@Override
public String apply(KeyValue<byte[], byte[]> input) {
String word = Bytes.toString(input.getKey());
LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
return word;
}
}));
}
}
}, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Aggregations