use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method checkOutputData.
private void checkOutputData(DataSetManager<KeyValueTable> manager) {
KeyValueTable count = manager.get();
// read output and verify result
byte[] val = count.read(Bytes.toBytes(TEST_STRING_1));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length());
val = count.read(Bytes.toBytes(TEST_STRING_2));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length());
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkLogParser method run.
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
Map<String, String> runtimeArguments = sec.getRuntimeArguments();
String inputFileSet = runtimeArguments.get("input");
final String outputTable = runtimeArguments.get("output");
JavaPairRDD<LongWritable, Text> input = sec.fromDataset(inputFileSet);
final JavaPairRDD<String, String> aggregated = input.mapToPair(new PairFunction<Tuple2<LongWritable, Text>, LogKey, LogStats>() {
@Override
public Tuple2<LogKey, LogStats> call(Tuple2<LongWritable, Text> input) throws Exception {
return SparkAppUsingGetDataset.parse(input._2());
}
}).reduceByKey(new Function2<LogStats, LogStats, LogStats>() {
@Override
public LogStats call(LogStats stats1, LogStats stats2) throws Exception {
return stats1.aggregate(stats2);
}
}).mapPartitionsToPair(new PairFlatMapFunction<Iterator<Tuple2<LogKey, LogStats>>, String, String>() {
@Override
public Iterator<Tuple2<String, String>> call(Iterator<Tuple2<LogKey, LogStats>> itor) throws Exception {
final Gson gson = new Gson();
return Lists.newArrayList(Iterators.transform(itor, new Function<Tuple2<LogKey, LogStats>, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> apply(Tuple2<LogKey, LogStats> input) {
return new Tuple2<>(gson.toJson(input._1()), gson.toJson(input._2()));
}
})).iterator();
}
});
// Collect all data to driver and write to dataset directly. That's the intend of the test.
sec.execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
KeyValueTable kvTable = context.getDataset(outputTable);
for (Map.Entry<String, String> entry : aggregated.collectAsMap().entrySet()) {
kvTable.write(entry.getKey(), entry.getValue());
}
}
});
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkServiceIntegrationTestRun method testSparkWithService.
@Test
public void testSparkWithService() throws Exception {
ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class);
startService(applicationManager);
SparkManager sparkManager = applicationManager.getSparkManager(TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> datasetManager = getDataset("result");
KeyValueTable results = datasetManager.get();
for (int i = 1; i <= 5; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key))));
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class ServiceLifeCycleTestRun method testContentConsumerProducerLifecycle.
@Test
public void testContentConsumerProducerLifecycle() throws Exception {
try {
ApplicationManager appManager = deployWithArtifact(ServiceLifecycleApp.class, artifactJar);
// Set to have one thread only for testing context capture and release
serviceManager = appManager.getServiceManager("test").start(ImmutableMap.of(SystemArguments.SERVICE_THREADS, "1"));
final DataSetManager<KeyValueTable> datasetManager = getDataset(ServiceLifecycleApp.HANDLER_TABLE_NAME);
// Clean up the dataset first to avoid being affected by other tests
datasetManager.get().delete(Bytes.toBytes("called"));
datasetManager.get().delete(Bytes.toBytes("completed"));
datasetManager.flush();
CountDownLatch uploadLatch = new CountDownLatch(1);
// Create five concurrent upload
List<ListenableFuture<Integer>> completions = new ArrayList<>();
for (int i = 0; i < 5; i++) {
completions.add(slowUpload(serviceManager, "POST", "uploadDownload", uploadLatch));
}
// Get the states, there should be six handler instances initialized.
// Five for the in-progress upload, one for the getStates call
Tasks.waitFor(6, () -> getStates(serviceManager).size(), 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// Complete the upload
uploadLatch.countDown();
// Make sure the download through content producer has started
Tasks.waitFor(true, () -> {
byte[] value = datasetManager.get().read("called");
datasetManager.flush();
return value != null && value.length == Bytes.SIZEOF_LONG && Bytes.toLong(value) > 5;
}, 10L, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// Get the states, there should still be six handler instances since the ContentConsumer should
// be passing it's captured context to the ContentProducer without creating new one.
Multimap<Integer, String> states = getStates(serviceManager);
Assert.assertEquals(6, states.size());
// Set the complete flag in the dataset
datasetManager.get().write("completed", Bytes.toBytes(true));
datasetManager.flush();
// Wait for completion
Futures.successfulAsList(completions).get(10, TimeUnit.SECONDS);
// Verify the upload result
for (ListenableFuture<Integer> future : completions) {
Assert.assertEquals(200, future.get().intValue());
}
// Get the states again, it should still be 6 same instances
Assert.assertEquals(states, getStates(serviceManager));
} finally {
serviceManager.stop();
serviceManager.waitForStopped(10, TimeUnit.SECONDS);
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class FileUploadServiceTestRun method testFileUploadService.
@Test
public void testFileUploadService() throws Exception {
ApplicationManager appManager = deployApplication(FileUploadApp.class);
// Start the service
ServiceManager serviceManager = appManager.getServiceManager(FileUploadApp.SERVICE_NAME).start();
serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
try {
// Upload URL is "base/upload/pfs/[partition_value], which the partition value is a long
URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
// Upload with wrong MD5, should get 400.
byte[] content = Strings.repeat("0123456789 ", 100).getBytes(Charsets.UTF_8);
Assert.assertEquals(HttpURLConnection.HTTP_BAD_REQUEST, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, "123", 30));
long beforeUploadTime = System.currentTimeMillis();
// Upload with right MD5, should get 200
Assert.assertEquals(HttpURLConnection.HTTP_OK, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, Base64.getEncoder().encodeToString(Hashing.md5().hashBytes(content).asBytes()), 20));
// Inspect the partitioned file set and verify the content
PartitionedFileSet pfs = (PartitionedFileSet) getDataset(FileUploadApp.PFS_NAME).get();
PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build());
Assert.assertNotNull(partition);
// Verify a notification should have been published for the new partition
List<Notification> notifications = getDataNotifications(beforeUploadTime);
// Should have one message
Assert.assertEquals(1, notifications.size());
verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
// There should be one file under the partition directory
List<Location> locations = partition.getLocation().list();
Assert.assertEquals(1, locations.size());
Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));
// Verify the tracking table of chunks sizes
KeyValueTable trackingTable = (KeyValueTable) getDataset(FileUploadApp.KV_TABLE_NAME).get();
CloseableIterator<KeyValue<byte[], byte[]>> iter = trackingTable.scan(null, null);
// Sum up all chunks sizes as being tracked by the tracking table.
long sum = 0;
int iterSize = 0;
while (iter.hasNext()) {
KeyValue<byte[], byte[]> kv = iter.next();
sum += Bytes.toInt(kv.getKey()) * Bytes.toLong(kv.getValue());
iterSize++;
}
// The iterator should have size >= 2, since we uses different chunk size for two different upload
Assert.assertTrue(iterSize >= 2);
// The sum of all chunks sizes should be the same as the
// content size * 2 (since we have one failure, one success upload)
Assert.assertEquals(content.length * 2, sum);
} finally {
serviceManager.stop();
serviceManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
}
}
Aggregations