Search in sources :

Example 41 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class Spark2Test method testScalaSparkCrossNSDataset.

@Test
public void testScalaSparkCrossNSDataset() throws Exception {
    // Deploy and create a dataset in namespace datasetSpaceForSpark
    NamespaceMeta inputDSNSMeta = new NamespaceMeta.Builder().setName("datasetSpaceForSpark").build();
    getNamespaceAdmin().create(inputDSNSMeta);
    deploy(inputDSNSMeta.getNamespaceId(), SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset(inputDSNSMeta.getNamespaceId().dataset("keys"));
    prepareInputData(keysManager);
    Map<String, String> args = ImmutableMap.of(ScalaCharCountProgram.INPUT_DATASET_NAMESPACE(), inputDSNSMeta.getNamespaceId().getNamespace(), ScalaCharCountProgram.INPUT_DATASET_NAME(), "keys");
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start(args);
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    sparkManager.waitForStopped(60, TimeUnit.SECONDS);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
}
Also used : ObjectStore(io.cdap.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(io.cdap.cdap.spark.app.SparkAppUsingObjectStore) ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) NamespaceMeta(io.cdap.cdap.proto.NamespaceMeta) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 42 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class Spark2Test method testSparkWithLocalFiles.

private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, appClass);
    URI localFile = createLocalPropertiesFile(prefix);
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    sparkManager.waitForStopped(120, TimeUnit.SECONDS);
    DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
    KeyValueTable kvTable = kvTableManager.get();
    Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
    List<byte[]> deleteKeys = new ArrayList<>();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
        for (int i = 0; i < 3; i++) {
            KeyValue<byte[], byte[]> next = scan.next();
            Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
            deleteKeys.add(next.getKey());
        }
        Assert.assertFalse(scan.hasNext());
    }
    // Cleanup after run
    kvTableManager.flush();
    for (byte[] key : deleteKeys) {
        kvTable.delete(key);
    }
    kvTableManager.flush();
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) KeyValue(io.cdap.cdap.api.dataset.lib.KeyValue) ArrayList(java.util.ArrayList) URI(java.net.URI) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable)

Example 43 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class Spark2Test method testScalaSparkCrossNSStream.

@Test
public void testScalaSparkCrossNSStream() throws Exception {
    // create a namespace for input and create a file set instance
    NamespaceMeta inputNSMeta = new NamespaceMeta.Builder().setName("inputSpaceForSpark").build();
    getNamespaceAdmin().create(inputNSMeta);
    DatasetId inputDatasetId = inputNSMeta.getNamespaceId().dataset("input");
    addDatasetInstance(FileSet.class.getName(), inputDatasetId, FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
    // create a namespace for dataset and add the dataset instance in it
    NamespaceMeta outputNSMeta = new NamespaceMeta.Builder().setName("crossNSDataset").build();
    getNamespaceAdmin().create(outputNSMeta);
    addDatasetInstance(outputNSMeta.getNamespaceId().dataset("count"), "keyValueTable");
    // write something to the input dataset
    Location inputFile = this.<FileSet>getDataset(inputDatasetId).get().getLocation("inputFile");
    try (PrintStream printer = new PrintStream(inputFile.getOutputStream(), true, "UTF-8")) {
        for (int i = 0; i < 50; i++) {
            printer.println(String.valueOf(i));
        }
    }
    // deploy the spark app in another namespace (default)
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    Map<String, String> args = new HashMap<>();
    args.put(ScalaCrossNSProgram.INPUT_NAMESPACE(), inputNSMeta.getNamespaceId().getNamespace());
    args.put(ScalaCrossNSProgram.OUTPUT_NAMESPACE(), outputNSMeta.getNamespaceId().getNamespace());
    args.put(ScalaCrossNSProgram.OUTPUT_NAME(), "count");
    FileSetArguments.setInputPath(args, "inputFile");
    SparkManager sparkManager = applicationManager.getSparkManager(ScalaCrossNSProgram.class.getSimpleName()).start(args);
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    sparkManager.waitForStopped(60, TimeUnit.SECONDS);
    // get the dataset from the other namespace where we expect it to exist and compare the data
    DataSetManager<KeyValueTable> countManager = getDataset(outputNSMeta.getNamespaceId().dataset("count"));
    KeyValueTable results = countManager.get();
    for (int i = 0; i < 50; i++) {
        byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
        Assert.assertArrayEquals(key, results.read(key));
    }
}
Also used : PrintStream(java.io.PrintStream) ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) DatasetId(io.cdap.cdap.proto.id.DatasetId) NamespaceMeta(io.cdap.cdap.proto.NamespaceMeta) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 44 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class Spark2Test method testSparkWithObjectStore.

@Test
public void testSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    sparkManager.waitForStopped(60, TimeUnit.SECONDS);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
    // validate that the table emitted metrics
    // one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
    Tasks.waitFor(4L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
            if (metrics.isEmpty()) {
                return 0L;
            }
            Assert.assertEquals(1, metrics.size());
            MetricTimeSeries ts = metrics.iterator().next();
            Assert.assertEquals(1, ts.getTimeValues().size());
            return ts.getTimeValues().get(0).getValue();
        }
    }, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) ObjectStore(io.cdap.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(io.cdap.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(io.cdap.cdap.test.SparkManager) MetricTimeSeries(io.cdap.cdap.api.metrics.MetricTimeSeries) Assume.assumeNoException(org.junit.Assume.assumeNoException) IOException(java.io.IOException) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Collection(java.util.Collection) MetricDataQuery(io.cdap.cdap.api.metrics.MetricDataQuery) Test(org.junit.Test)

Example 45 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.

the class Spark2Test method checkOutputData.

private void checkOutputData(DataSetManager<KeyValueTable> manager) {
    KeyValueTable count = manager.get();
    // read output and verify result
    byte[] val = count.read(Bytes.toBytes(TEST_STRING_1));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length());
    val = count.read(Bytes.toBytes(TEST_STRING_2));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length());
}
Also used : KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable)

Aggregations

KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)122 Test (org.junit.Test)65 ApplicationManager (io.cdap.cdap.test.ApplicationManager)59 HashMap (java.util.HashMap)27 SparkManager (io.cdap.cdap.test.SparkManager)26 Table (io.cdap.cdap.api.dataset.table.Table)21 TransactionExecutor (org.apache.tephra.TransactionExecutor)20 WorkflowManager (io.cdap.cdap.test.WorkflowManager)19 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 KeyValue (io.cdap.cdap.api.dataset.lib.KeyValue)14 ServiceManager (io.cdap.cdap.test.ServiceManager)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 Location (org.apache.twill.filesystem.Location)14 ImmutableMap (com.google.common.collect.ImmutableMap)13 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)13 File (java.io.File)13 URL (java.net.URL)12