Search in sources :

Example 56 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkTestRun method testDynamicSpark.

@Test
public void testDynamicSpark() throws Exception {
    ApplicationManager appManager = deploy(TestSparkApp.class);
    // Populate data into the stream
    StreamManager streamManager = getStreamManager("SparkStream");
    for (int i = 0; i < 10; i++) {
        streamManager.send("Line " + (i + 1));
    }
    SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("input", "SparkStream", "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // Validate the result written to dataset
    KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
    // There should be ten "Line"
    Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line")));
    // Each number should appear once
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1))));
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ScalaDynamicSpark(co.cask.cdap.spark.app.ScalaDynamicSpark) Test(org.junit.Test)

Example 57 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkTestRun method testSparkWithLocalFiles.

private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
    ApplicationManager applicationManager = deploy(appClass);
    URI localFile = createLocalPropertiesFile(prefix);
    SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
    KeyValueTable kvTable = kvTableManager.get();
    Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
    List<byte[]> deleteKeys = new ArrayList<>();
    try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
        for (int i = 0; i < 3; i++) {
            KeyValue<byte[], byte[]> next = scan.next();
            Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
            deleteKeys.add(next.getKey());
        }
        Assert.assertFalse(scan.hasNext());
    }
    // Cleanup after run
    kvTableManager.flush();
    for (byte[] key : deleteKeys) {
        kvTable.delete(key);
    }
    kvTableManager.flush();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) ArrayList(java.util.ArrayList) URI(java.net.URI) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable)

Example 58 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkStreamIntegrationTestRun method testSparkCrossNS.

@Test
public void testSparkCrossNS() throws Exception {
    // Test for reading stream cross namespace, reading and writing to dataset cross namespace
    // TestSparkStreamIntegrationApp deployed in default namespace
    // which reads a stream from streamNS and writes to a dataset in its own ns (default)
    // TestSparkCrossNSDatasetApp deployed at crossNSDatasetAppNS:
    //  reading from the dataset in default (created by TestSparkStreamIntegrationApp) and write to a dataset
    // in outputDatasetNS
    NamespaceMeta streamNSMeta = new NamespaceMeta.Builder().setName("streamNS").build();
    NamespaceMeta crossNSDatasetAppNS = new NamespaceMeta.Builder().setName("crossNSDatasetAppNS").build();
    NamespaceMeta outputDatasetNS = new NamespaceMeta.Builder().setName("outputDatasetNS").build();
    getNamespaceAdmin().create(streamNSMeta);
    getNamespaceAdmin().create(crossNSDatasetAppNS);
    getNamespaceAdmin().create(outputDatasetNS);
    addDatasetInstance(outputDatasetNS.getNamespaceId().dataset("finalDataset"), "keyValueTable");
    StreamManager streamManager = getStreamManager(streamNSMeta.getNamespaceId().stream("testStream"));
    streamManager.createStream();
    for (int i = 0; i < 50; i++) {
        streamManager.send(String.valueOf(i));
    }
    // deploy TestSparkStreamIntegrationApp in default namespace
    ApplicationManager spark1 = deployApplication(TestSparkStreamIntegrationApp.class);
    Map<String, String> args = ImmutableMap.of(TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAMESPACE, streamNSMeta.getNamespaceId().getNamespace(), TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAME, "testStream");
    SparkManager sparkManager = spark1.getSparkManager("SparkStreamProgram").start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
    // Verify the results written in default namespace by spark1
    DataSetManager<KeyValueTable> datasetManager = getDataset("result");
    verifyDatasetResult(datasetManager);
    // deploy the cross  ns dataset app in datasetNS namespace
    ApplicationManager spark2 = deployApplication(crossNSDatasetAppNS.getNamespaceId(), TestSparkCrossNSDatasetApp.class);
    args = ImmutableMap.of(TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.INPUT_DATASET_NAMESPACE, NamespaceId.DEFAULT.getNamespace(), TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.INPUT_DATASET_NAME, "result", TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.OUTPUT_DATASET_NAMESPACE, outputDatasetNS.getNamespaceId().getNamespace(), TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.OUTPUT_DATASET_NAME, "finalDataset");
    sparkManager = spark2.getSparkManager("SparkCrossNSDatasetProgram").start(args);
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
    // Verify the results written in DEFAULT by spark2
    datasetManager = getDataset(outputDatasetNS.getNamespaceId().dataset("finalDataset"));
    verifyDatasetResult(datasetManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 59 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class TestFrameworkTestRun method testMapperDatasetAccess.

@Category(SlowTests.class)
@Test
public void testMapperDatasetAccess() throws Exception {
    addDatasetInstance("keyValueTable", "table1");
    addDatasetInstance("keyValueTable", "table2");
    DataSetManager<KeyValueTable> tableManager = getDataset("table1");
    KeyValueTable inputTable = tableManager.get();
    inputTable.write("hello", "world");
    tableManager.flush();
    ApplicationManager appManager = deployApplication(DatasetWithMRApp.class);
    Map<String, String> argsForMR = ImmutableMap.of(DatasetWithMRApp.INPUT_KEY, "table1", DatasetWithMRApp.OUTPUT_KEY, "table2");
    MapReduceManager mrManager = appManager.getMapReduceManager(DatasetWithMRApp.MAPREDUCE_PROGRAM).start(argsForMR);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset("table2");
    verifyMapperJobOutput(DatasetWithMRApp.class, outTableManager);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 60 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class DynamicPartitioningTestRun method testDynamicPartitioningWithFailure.

@Test
public void testDynamicPartitioningWithFailure() throws Exception {
    // deploy app
    ApplicationManager appManager = deployApplication(testSpace, AppWithDynamicPartitioning.class);
    // setup inputs
    DataSetManager<KeyValueTable> dataSetManager = getDataset(testSpace.dataset("input"));
    KeyValueTable input = dataSetManager.get();
    for (int i = 0; i < 3; i++) {
        input.write(String.valueOf(i), "" + ('a' + i));
    }
    dataSetManager.flush();
    // run MR with one pfs
    testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1");
    // run MR with two pfs
    testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1", "pfs2");
    // run MR with two pfs in reverse order (because we don't want to rely on which one gets committed first)
    testDynamicPartitioningMRWithFailure(appManager, "pfs2", "pfs1", "pfs2");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Aggregations

KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)69 Test (org.junit.Test)39 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 SparkManager (co.cask.cdap.test.SparkManager)16 IOException (java.io.IOException)14 StreamManager (co.cask.cdap.test.StreamManager)13 TransactionExecutor (org.apache.tephra.TransactionExecutor)11 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)10 ServiceManager (co.cask.cdap.test.ServiceManager)9 HashMap (java.util.HashMap)9 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)8 Table (co.cask.cdap.api.dataset.table.Table)8 WorkflowManager (co.cask.cdap.test.WorkflowManager)7 ArrayList (java.util.ArrayList)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)6 FlowManager (co.cask.cdap.test.FlowManager)6 MapReduceManager (co.cask.cdap.test.MapReduceManager)6 Category (org.junit.experimental.categories.Category)6 Schema (co.cask.cdap.api.data.schema.Schema)5