use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkTestRun method testDynamicSpark.
@Test
public void testDynamicSpark() throws Exception {
ApplicationManager appManager = deploy(TestSparkApp.class);
// Populate data into the stream
StreamManager streamManager = getStreamManager("SparkStream");
for (int i = 0; i < 10; i++) {
streamManager.send("Line " + (i + 1));
}
SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("input", "SparkStream", "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// Validate the result written to dataset
KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get();
// There should be ten "Line"
Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line")));
// Each number should appear once
for (int i = 0; i < 10; i++) {
Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1))));
}
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkTestRun method testSparkWithLocalFiles.
private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
ApplicationManager applicationManager = deploy(appClass);
URI localFile = createLocalPropertiesFile(prefix);
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
KeyValueTable kvTable = kvTableManager.get();
Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
List<byte[]> deleteKeys = new ArrayList<>();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
for (int i = 0; i < 3; i++) {
KeyValue<byte[], byte[]> next = scan.next();
Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
deleteKeys.add(next.getKey());
}
Assert.assertFalse(scan.hasNext());
}
// Cleanup after run
kvTableManager.flush();
for (byte[] key : deleteKeys) {
kvTable.delete(key);
}
kvTableManager.flush();
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkStreamIntegrationTestRun method testSparkCrossNS.
@Test
public void testSparkCrossNS() throws Exception {
// Test for reading stream cross namespace, reading and writing to dataset cross namespace
// TestSparkStreamIntegrationApp deployed in default namespace
// which reads a stream from streamNS and writes to a dataset in its own ns (default)
// TestSparkCrossNSDatasetApp deployed at crossNSDatasetAppNS:
// reading from the dataset in default (created by TestSparkStreamIntegrationApp) and write to a dataset
// in outputDatasetNS
NamespaceMeta streamNSMeta = new NamespaceMeta.Builder().setName("streamNS").build();
NamespaceMeta crossNSDatasetAppNS = new NamespaceMeta.Builder().setName("crossNSDatasetAppNS").build();
NamespaceMeta outputDatasetNS = new NamespaceMeta.Builder().setName("outputDatasetNS").build();
getNamespaceAdmin().create(streamNSMeta);
getNamespaceAdmin().create(crossNSDatasetAppNS);
getNamespaceAdmin().create(outputDatasetNS);
addDatasetInstance(outputDatasetNS.getNamespaceId().dataset("finalDataset"), "keyValueTable");
StreamManager streamManager = getStreamManager(streamNSMeta.getNamespaceId().stream("testStream"));
streamManager.createStream();
for (int i = 0; i < 50; i++) {
streamManager.send(String.valueOf(i));
}
// deploy TestSparkStreamIntegrationApp in default namespace
ApplicationManager spark1 = deployApplication(TestSparkStreamIntegrationApp.class);
Map<String, String> args = ImmutableMap.of(TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAMESPACE, streamNSMeta.getNamespaceId().getNamespace(), TestSparkStreamIntegrationApp.SparkStreamProgram.INPUT_STREAM_NAME, "testStream");
SparkManager sparkManager = spark1.getSparkManager("SparkStreamProgram").start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
// Verify the results written in default namespace by spark1
DataSetManager<KeyValueTable> datasetManager = getDataset("result");
verifyDatasetResult(datasetManager);
// deploy the cross ns dataset app in datasetNS namespace
ApplicationManager spark2 = deployApplication(crossNSDatasetAppNS.getNamespaceId(), TestSparkCrossNSDatasetApp.class);
args = ImmutableMap.of(TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.INPUT_DATASET_NAMESPACE, NamespaceId.DEFAULT.getNamespace(), TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.INPUT_DATASET_NAME, "result", TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.OUTPUT_DATASET_NAMESPACE, outputDatasetNS.getNamespaceId().getNamespace(), TestSparkCrossNSDatasetApp.SparkCrossNSDatasetProgram.OUTPUT_DATASET_NAME, "finalDataset");
sparkManager = spark2.getSparkManager("SparkCrossNSDatasetProgram").start(args);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
// Verify the results written in DEFAULT by spark2
datasetManager = getDataset(outputDatasetNS.getNamespaceId().dataset("finalDataset"));
verifyDatasetResult(datasetManager);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestFrameworkTestRun method testMapperDatasetAccess.
@Category(SlowTests.class)
@Test
public void testMapperDatasetAccess() throws Exception {
addDatasetInstance("keyValueTable", "table1");
addDatasetInstance("keyValueTable", "table2");
DataSetManager<KeyValueTable> tableManager = getDataset("table1");
KeyValueTable inputTable = tableManager.get();
inputTable.write("hello", "world");
tableManager.flush();
ApplicationManager appManager = deployApplication(DatasetWithMRApp.class);
Map<String, String> argsForMR = ImmutableMap.of(DatasetWithMRApp.INPUT_KEY, "table1", DatasetWithMRApp.OUTPUT_KEY, "table2");
MapReduceManager mrManager = appManager.getMapReduceManager(DatasetWithMRApp.MAPREDUCE_PROGRAM).start(argsForMR);
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
appManager.stopAll();
DataSetManager<KeyValueTable> outTableManager = getDataset("table2");
verifyMapperJobOutput(DatasetWithMRApp.class, outTableManager);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class DynamicPartitioningTestRun method testDynamicPartitioningWithFailure.
@Test
public void testDynamicPartitioningWithFailure() throws Exception {
// deploy app
ApplicationManager appManager = deployApplication(testSpace, AppWithDynamicPartitioning.class);
// setup inputs
DataSetManager<KeyValueTable> dataSetManager = getDataset(testSpace.dataset("input"));
KeyValueTable input = dataSetManager.get();
for (int i = 0; i < 3; i++) {
input.write(String.valueOf(i), "" + ('a' + i));
}
dataSetManager.flush();
// run MR with one pfs
testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1");
// run MR with two pfs
testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1", "pfs2");
// run MR with two pfs in reverse order (because we don't want to rely on which one gets committed first)
testDynamicPartitioningMRWithFailure(appManager, "pfs2", "pfs1", "pfs2");
}
Aggregations