Search in sources :

Example 1 with KeyValue

use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.

the class SparkTestRun method validateGetDatasetOutput.

private void validateGetDatasetOutput(KeyValueTable logStatsTable) {
    SparkAppUsingGetDataset.LogKey fredKey1 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 200);
    SparkAppUsingGetDataset.LogKey fredKey2 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 404);
    SparkAppUsingGetDataset.LogKey bradKey1 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 200);
    SparkAppUsingGetDataset.LogKey bradKey2 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 404);
    SparkAppUsingGetDataset.LogStats fredStats1 = new SparkAppUsingGetDataset.LogStats(2, 100);
    SparkAppUsingGetDataset.LogStats fredStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
    SparkAppUsingGetDataset.LogStats bradStats1 = new SparkAppUsingGetDataset.LogStats(1, 50);
    SparkAppUsingGetDataset.LogStats bradStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
    Map<SparkAppUsingGetDataset.LogKey, SparkAppUsingGetDataset.LogStats> expected = ImmutableMap.of(fredKey1, fredStats1, fredKey2, fredStats2, bradKey1, bradStats1, bradKey2, bradStats2);
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
        // must have 4 records
        for (int i = 0; i < 4; i++) {
            Assert.assertTrue("Expected next for i = " + i, scan.hasNext());
            KeyValue<byte[], byte[]> next = scan.next();
            SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class);
            SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
            Assert.assertEquals(expected.get(logKey), logStats);
        }
        // no more records
        Assert.assertFalse(scan.hasNext());
    }
}
Also used : KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) SparkAppUsingGetDataset(co.cask.cdap.spark.app.SparkAppUsingGetDataset) Gson(com.google.gson.Gson)

Example 2 with KeyValue

use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.

the class SparkTestRun method testSparkWithGetDataset.

private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
    ApplicationManager applicationManager = deploy(appClass);
    DataSetManager<FileSet> filesetManager = getDataset("logs");
    FileSet fileset = filesetManager.get();
    Location location = fileset.getLocation("nn");
    prepareInputFileSetWithLogData(location);
    Map<String, String> inputArgs = new HashMap<>();
    FileSetArguments.setInputPath(inputArgs, "nn");
    Map<String, String> args = new HashMap<>();
    args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
    args.put("input", "logs");
    args.put("output", "logStats");
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
    KeyValueTable logStatsTable = logStatsManager.get();
    validateGetDatasetOutput(logStatsTable);
    // Cleanup after run
    location.delete(true);
    logStatsManager.flush();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
        while (scan.hasNext()) {
            logStatsTable.delete(scan.next().getKey());
        }
    }
    logStatsManager.flush();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) FileSet(co.cask.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location)

Example 3 with KeyValue

use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.

the class SparkTest method validateGetDatasetOutput.

private void validateGetDatasetOutput(KeyValueTable logStatsTable) {
    SparkAppUsingGetDataset.LogKey fredKey1 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 200);
    SparkAppUsingGetDataset.LogKey fredKey2 = new SparkAppUsingGetDataset.LogKey("10.10.10.10", "FRED", "GET http://bar.com/image.jpg HTTP/1.1", 404);
    SparkAppUsingGetDataset.LogKey bradKey1 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 200);
    SparkAppUsingGetDataset.LogKey bradKey2 = new SparkAppUsingGetDataset.LogKey("20.20.20.20", "BRAD", "GET http://bar.com/image.jpg HTTP/1.1", 404);
    SparkAppUsingGetDataset.LogStats fredStats1 = new SparkAppUsingGetDataset.LogStats(2, 100);
    SparkAppUsingGetDataset.LogStats fredStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
    SparkAppUsingGetDataset.LogStats bradStats1 = new SparkAppUsingGetDataset.LogStats(1, 50);
    SparkAppUsingGetDataset.LogStats bradStats2 = new SparkAppUsingGetDataset.LogStats(1, 50);
    Map<SparkAppUsingGetDataset.LogKey, SparkAppUsingGetDataset.LogStats> expected = ImmutableMap.of(fredKey1, fredStats1, fredKey2, fredStats2, bradKey1, bradStats1, bradKey2, bradStats2);
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
        // must have 4 records
        for (int i = 0; i < 4; i++) {
            Assert.assertTrue("Expected next for i = " + i, scan.hasNext());
            KeyValue<byte[], byte[]> next = scan.next();
            SparkAppUsingGetDataset.LogKey logKey = new Gson().fromJson(Bytes.toString(next.getKey()), SparkAppUsingGetDataset.LogKey.class);
            SparkAppUsingGetDataset.LogStats logStats = new Gson().fromJson(Bytes.toString(next.getValue()), SparkAppUsingGetDataset.LogStats.class);
            Assert.assertEquals(expected.get(logKey), logStats);
        }
        // no more records
        Assert.assertFalse(scan.hasNext());
    }
}
Also used : KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) SparkAppUsingGetDataset(co.cask.cdap.spark.app.SparkAppUsingGetDataset) Gson(com.google.gson.Gson)

Example 4 with KeyValue

use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.

the class FileUploadServiceTestRun method testFileUploadService.

@Test
public void testFileUploadService() throws Exception {
    ApplicationManager appManager = deployApplication(FileUploadApp.class);
    // Start the service
    ServiceManager serviceManager = appManager.getServiceManager(FileUploadApp.SERVICE_NAME).start();
    serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    try {
        // Upload URL is "base/upload/pfs/[partition_value], which the partition value is a long
        URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
        // Upload with wrong MD5, should get 400.
        byte[] content = Strings.repeat("0123456789 ", 100).getBytes(Charsets.UTF_8);
        Assert.assertEquals(HttpURLConnection.HTTP_BAD_REQUEST, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, "123", 30));
        long beforeUploadTime = System.currentTimeMillis();
        // Upload with right MD5, should get 200
        Assert.assertEquals(HttpURLConnection.HTTP_OK, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, BaseEncoding.base64().encode(Hashing.md5().hashBytes(content).asBytes()), 20));
        // Inspect the partitioned file set and verify the content
        PartitionedFileSet pfs = (PartitionedFileSet) getDataset(FileUploadApp.PFS_NAME).get();
        PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build());
        Assert.assertNotNull(partition);
        // Verify a notification should have been published for the new partition
        List<Notification> notifications = getDataNotifications(beforeUploadTime);
        // Should have one message
        Assert.assertEquals(1, notifications.size());
        verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
        // There should be one file under the partition directory
        List<Location> locations = partition.getLocation().list();
        Assert.assertEquals(1, locations.size());
        Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));
        // Verify the tracking table of chunks sizes
        KeyValueTable trackingTable = (KeyValueTable) getDataset(FileUploadApp.KV_TABLE_NAME).get();
        CloseableIterator<KeyValue<byte[], byte[]>> iter = trackingTable.scan(null, null);
        // Sum up all chunks sizes as being tracked by the tracking table.
        long sum = 0;
        int iterSize = 0;
        while (iter.hasNext()) {
            KeyValue<byte[], byte[]> kv = iter.next();
            sum += Bytes.toInt(kv.getKey()) * Bytes.toLong(kv.getValue());
            iterSize++;
        }
        // The iterator should have size >= 2, since we uses different chunk size for two different upload
        Assert.assertTrue(iterSize >= 2);
        // The sum of all chunks sizes should be the same as the
        // content size * 2 (since we have one failure, one success upload)
        Assert.assertEquals(content.length * 2, sum);
    } finally {
        serviceManager.stop();
        serviceManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) URI(java.net.URI) Notification(co.cask.cdap.proto.Notification) ServiceManager(co.cask.cdap.test.ServiceManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 5 with KeyValue

use of co.cask.cdap.api.dataset.lib.KeyValue in project cdap by caskdata.

the class SparkTestRun method testTransaction.

@Test
public void testTransaction() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    StreamManager streamManager = getStreamManager("SparkStream");
    // Write some sentences to the stream
    streamManager.send("red fox");
    streamManager.send("brown fox");
    streamManager.send("grey fox");
    streamManager.send("brown bear");
    streamManager.send("black bear");
    // Run the spark program
    SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
    // Verify result from dataset before the Spark program terminates
    final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
    final KeyValueTable resultTable = resultManager.get();
    // Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
    Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {

        @Override
        public Set<String> call() throws Exception {
            // This is to start a new TX
            resultManager.flush();
            LOG.info("Reading from threshold result");
            try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
                return ImmutableSet.copyOf(Iterators.transform(itor, new Function<KeyValue<byte[], byte[]>, String>() {

                    @Override
                    public String apply(KeyValue<byte[], byte[]> input) {
                        String word = Bytes.toString(input.getKey());
                        LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
                        return word;
                    }
                }));
            }
        }
    }, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    sparkManager.stop();
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) SparkManager(co.cask.cdap.test.SparkManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(co.cask.cdap.api.dataset.lib.FileSet) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) IOException(java.io.IOException) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Aggregations

KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)14 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)10 ApplicationManager (co.cask.cdap.test.ApplicationManager)7 SparkManager (co.cask.cdap.test.SparkManager)5 Test (org.junit.Test)4 CloseableIterator (co.cask.cdap.api.dataset.lib.CloseableIterator)3 FileSet (co.cask.cdap.api.dataset.lib.FileSet)3 IOException (java.io.IOException)3 URI (java.net.URI)3 HashMap (java.util.HashMap)3 Location (org.apache.twill.filesystem.Location)3 SparkAppUsingGetDataset (co.cask.cdap.spark.app.SparkAppUsingGetDataset)2 Gson (com.google.gson.Gson)2 ArrayList (java.util.ArrayList)2 IdentityHashMap (java.util.IdentityHashMap)2 TxRunnable (co.cask.cdap.api.TxRunnable)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)1 ObjectMappedTable (co.cask.cdap.api.dataset.lib.ObjectMappedTable)1 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)1