Search in sources :

Example 26 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class Spark2Test method checkOutputData.

private void checkOutputData(DataSetManager<KeyValueTable> manager) {
    KeyValueTable count = manager.get();
    // read output and verify result
    byte[] val = count.read(Bytes.toBytes(TEST_STRING_1));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length());
    val = count.read(Bytes.toBytes(TEST_STRING_2));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length());
}
Also used : KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable)

Example 27 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkLogParser method run.

@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
    JavaSparkContext jsc = new JavaSparkContext();
    Map<String, String> runtimeArguments = sec.getRuntimeArguments();
    String inputFileSet = runtimeArguments.get("input");
    final String outputTable = runtimeArguments.get("output");
    JavaPairRDD<LongWritable, Text> input = sec.fromDataset(inputFileSet);
    final JavaPairRDD<String, String> aggregated = input.mapToPair(new PairFunction<Tuple2<LongWritable, Text>, LogKey, LogStats>() {

        @Override
        public Tuple2<LogKey, LogStats> call(Tuple2<LongWritable, Text> input) throws Exception {
            return SparkAppUsingGetDataset.parse(input._2());
        }
    }).reduceByKey(new Function2<LogStats, LogStats, LogStats>() {

        @Override
        public LogStats call(LogStats stats1, LogStats stats2) throws Exception {
            return stats1.aggregate(stats2);
        }
    }).mapPartitionsToPair(new PairFlatMapFunction<Iterator<Tuple2<LogKey, LogStats>>, String, String>() {

        @Override
        public Iterator<Tuple2<String, String>> call(Iterator<Tuple2<LogKey, LogStats>> itor) throws Exception {
            final Gson gson = new Gson();
            return Lists.newArrayList(Iterators.transform(itor, new Function<Tuple2<LogKey, LogStats>, Tuple2<String, String>>() {

                @Override
                public Tuple2<String, String> apply(Tuple2<LogKey, LogStats> input) {
                    return new Tuple2<>(gson.toJson(input._1()), gson.toJson(input._2()));
                }
            })).iterator();
        }
    });
    // Collect all data to driver and write to dataset directly. That's the intend of the test.
    sec.execute(new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            KeyValueTable kvTable = context.getDataset(outputTable);
            for (Map.Entry<String, String> entry : aggregated.collectAsMap().entrySet()) {
                kvTable.write(entry.getKey(), entry.getValue());
            }
        }
    });
}
Also used : LogKey(io.cdap.cdap.spark.app.SparkAppUsingGetDataset.LogKey) Gson(com.google.gson.Gson) Text(org.apache.hadoop.io.Text) Function2(org.apache.spark.api.java.function.Function2) LogStats(io.cdap.cdap.spark.app.SparkAppUsingGetDataset.LogStats) Function(com.google.common.base.Function) PairFlatMapFunction(org.apache.spark.api.java.function.PairFlatMapFunction) PairFunction(org.apache.spark.api.java.function.PairFunction) Tuple2(scala.Tuple2) TxRunnable(io.cdap.cdap.api.TxRunnable) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Iterator(java.util.Iterator) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LongWritable(org.apache.hadoop.io.LongWritable) DatasetContext(io.cdap.cdap.api.data.DatasetContext)

Example 28 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkServiceIntegrationTestRun method testSparkWithService.

@Test
public void testSparkWithService() throws Exception {
    ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class);
    startService(applicationManager);
    SparkManager sparkManager = applicationManager.getSparkManager(TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
    DataSetManager<KeyValueTable> datasetManager = getDataset("result");
    KeyValueTable results = datasetManager.get();
    for (int i = 1; i <= 5; i++) {
        byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
        Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key))));
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 29 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class ServiceLifeCycleTestRun method testContentConsumerProducerLifecycle.

@Test
public void testContentConsumerProducerLifecycle() throws Exception {
    try {
        ApplicationManager appManager = deployWithArtifact(ServiceLifecycleApp.class, artifactJar);
        // Set to have one thread only for testing context capture and release
        serviceManager = appManager.getServiceManager("test").start(ImmutableMap.of(SystemArguments.SERVICE_THREADS, "1"));
        final DataSetManager<KeyValueTable> datasetManager = getDataset(ServiceLifecycleApp.HANDLER_TABLE_NAME);
        // Clean up the dataset first to avoid being affected by other tests
        datasetManager.get().delete(Bytes.toBytes("called"));
        datasetManager.get().delete(Bytes.toBytes("completed"));
        datasetManager.flush();
        CountDownLatch uploadLatch = new CountDownLatch(1);
        // Create five concurrent upload
        List<ListenableFuture<Integer>> completions = new ArrayList<>();
        for (int i = 0; i < 5; i++) {
            completions.add(slowUpload(serviceManager, "POST", "uploadDownload", uploadLatch));
        }
        // Get the states, there should be six handler instances initialized.
        // Five for the in-progress upload, one for the getStates call
        Tasks.waitFor(6, () -> getStates(serviceManager).size(), 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
        // Complete the upload
        uploadLatch.countDown();
        // Make sure the download through content producer has started
        Tasks.waitFor(true, () -> {
            byte[] value = datasetManager.get().read("called");
            datasetManager.flush();
            return value != null && value.length == Bytes.SIZEOF_LONG && Bytes.toLong(value) > 5;
        }, 10L, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
        // Get the states, there should still be six handler instances since the ContentConsumer should
        // be passing it's captured context to the ContentProducer without creating new one.
        Multimap<Integer, String> states = getStates(serviceManager);
        Assert.assertEquals(6, states.size());
        // Set the complete flag in the dataset
        datasetManager.get().write("completed", Bytes.toBytes(true));
        datasetManager.flush();
        // Wait for completion
        Futures.successfulAsList(completions).get(10, TimeUnit.SECONDS);
        // Verify the upload result
        for (ListenableFuture<Integer> future : completions) {
            Assert.assertEquals(200, future.get().intValue());
        }
        // Get the states again, it should still be 6 same instances
        Assert.assertEquals(states, getStates(serviceManager));
    } finally {
        serviceManager.stop();
        serviceManager.waitForStopped(10, TimeUnit.SECONDS);
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArrayList(java.util.ArrayList) CountDownLatch(java.util.concurrent.CountDownLatch) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Test(org.junit.Test)

Example 30 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class FileUploadServiceTestRun method testFileUploadService.

@Test
public void testFileUploadService() throws Exception {
    ApplicationManager appManager = deployApplication(FileUploadApp.class);
    // Start the service
    ServiceManager serviceManager = appManager.getServiceManager(FileUploadApp.SERVICE_NAME).start();
    serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    try {
        // Upload URL is "base/upload/pfs/[partition_value], which the partition value is a long
        URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
        // Upload with wrong MD5, should get 400.
        byte[] content = Strings.repeat("0123456789 ", 100).getBytes(Charsets.UTF_8);
        Assert.assertEquals(HttpURLConnection.HTTP_BAD_REQUEST, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, "123", 30));
        long beforeUploadTime = System.currentTimeMillis();
        // Upload with right MD5, should get 200
        Assert.assertEquals(HttpURLConnection.HTTP_OK, upload(serviceURI.resolve("upload/" + FileUploadApp.PFS_NAME + "/1").toURL(), content, Base64.getEncoder().encodeToString(Hashing.md5().hashBytes(content).asBytes()), 20));
        // Inspect the partitioned file set and verify the content
        PartitionedFileSet pfs = (PartitionedFileSet) getDataset(FileUploadApp.PFS_NAME).get();
        PartitionDetail partition = pfs.getPartition(PartitionKey.builder().addLongField("time", 1).build());
        Assert.assertNotNull(partition);
        // Verify a notification should have been published for the new partition
        List<Notification> notifications = getDataNotifications(beforeUploadTime);
        // Should have one message
        Assert.assertEquals(1, notifications.size());
        verifyDataNotification(notifications.get(0), NamespaceId.DEFAULT.dataset(FileUploadApp.PFS_NAME), Collections.singletonList(PartitionKey.builder().addLongField("time", 1L).build()));
        // There should be one file under the partition directory
        List<Location> locations = partition.getLocation().list();
        Assert.assertEquals(1, locations.size());
        Assert.assertArrayEquals(content, ByteStreams.toByteArray(Locations.newInputSupplier(locations.get(0))));
        // Verify the tracking table of chunks sizes
        KeyValueTable trackingTable = (KeyValueTable) getDataset(FileUploadApp.KV_TABLE_NAME).get();
        CloseableIterator<KeyValue<byte[], byte[]>> iter = trackingTable.scan(null, null);
        // Sum up all chunks sizes as being tracked by the tracking table.
        long sum = 0;
        int iterSize = 0;
        while (iter.hasNext()) {
            KeyValue<byte[], byte[]> kv = iter.next();
            sum += Bytes.toInt(kv.getKey()) * Bytes.toLong(kv.getValue());
            iterSize++;
        }
        // The iterator should have size >= 2, since we uses different chunk size for two different upload
        Assert.assertTrue(iterSize >= 2);
        // The sum of all chunks sizes should be the same as the
        // content size * 2 (since we have one failure, one success upload)
        Assert.assertEquals(content.length * 2, sum);
    } finally {
        serviceManager.stop();
        serviceManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValue(io.cdap.cdap.api.dataset.lib.KeyValue) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) URI(java.net.URI) Notification(io.cdap.cdap.proto.Notification) ServiceManager(io.cdap.cdap.test.ServiceManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)122 Test (org.junit.Test)65 ApplicationManager (io.cdap.cdap.test.ApplicationManager)59 HashMap (java.util.HashMap)27 SparkManager (io.cdap.cdap.test.SparkManager)26 Table (io.cdap.cdap.api.dataset.table.Table)21 TransactionExecutor (org.apache.tephra.TransactionExecutor)20 WorkflowManager (io.cdap.cdap.test.WorkflowManager)19 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 KeyValue (io.cdap.cdap.api.dataset.lib.KeyValue)14 ServiceManager (io.cdap.cdap.test.ServiceManager)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 Location (org.apache.twill.filesystem.Location)14 ImmutableMap (com.google.common.collect.ImmutableMap)13 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)13 File (java.io.File)13 URL (java.net.URL)12