use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method testScalaSparkWithObjectStore.
@Test
public void testScalaSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method testSparkWithObjectStore.
@Test
public void testSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
// validate that the table emitted metrics
// one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
Tasks.waitFor(4L, new Callable<Long>() {
@Override
public Long call() throws Exception {
Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
if (metrics.isEmpty()) {
return 0L;
}
Assert.assertEquals(1, metrics.size());
MetricTimeSeries ts = metrics.iterator().next();
Assert.assertEquals(1, ts.getTimeValues().size());
return ts.getTimeValues().get(0).getValue();
}
}, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method testScalaSparkCrossNSDataset.
@Test
public void testScalaSparkCrossNSDataset() throws Exception {
// Deploy and create a dataset in namespace datasetSpaceForSpark
NamespaceMeta inputDSNSMeta = new NamespaceMeta.Builder().setName("datasetSpaceForSpark").build();
getNamespaceAdmin().create(inputDSNSMeta);
deploy(inputDSNSMeta.getNamespaceId(), SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset(inputDSNSMeta.getNamespaceId().dataset("keys"));
prepareInputData(keysManager);
Map<String, String> args = ImmutableMap.of(ScalaCharCountProgram.INPUT_DATASET_NAMESPACE(), inputDSNSMeta.getNamespaceId().getNamespace(), ScalaCharCountProgram.INPUT_DATASET_NAME(), "keys");
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCharCountProgram.class.getSimpleName()).start(args);
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method testScalaSparkCrossNSStream.
@Test
public void testScalaSparkCrossNSStream() throws Exception {
// create a namespace for input and create a file set instance
NamespaceMeta inputNSMeta = new NamespaceMeta.Builder().setName("inputSpaceForSpark").build();
getNamespaceAdmin().create(inputNSMeta);
DatasetId inputDatasetId = inputNSMeta.getNamespaceId().dataset("input");
addDatasetInstance(FileSet.class.getName(), inputDatasetId, FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
// create a namespace for dataset and add the dataset instance in it
NamespaceMeta outputNSMeta = new NamespaceMeta.Builder().setName("crossNSDataset").build();
getNamespaceAdmin().create(outputNSMeta);
addDatasetInstance(outputNSMeta.getNamespaceId().dataset("count"), "keyValueTable");
// write something to the input dataset
Location inputFile = this.<FileSet>getDataset(inputDatasetId).get().getLocation("inputFile");
try (PrintStream printer = new PrintStream(inputFile.getOutputStream(), true, "UTF-8")) {
for (int i = 0; i < 50; i++) {
printer.println(String.valueOf(i));
}
}
// deploy the spark app in another namespace (default)
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
Map<String, String> args = new HashMap<>();
args.put(ScalaCrossNSProgram.INPUT_NAMESPACE(), inputNSMeta.getNamespaceId().getNamespace());
args.put(ScalaCrossNSProgram.OUTPUT_NAMESPACE(), outputNSMeta.getNamespaceId().getNamespace());
args.put(ScalaCrossNSProgram.OUTPUT_NAME(), "count");
FileSetArguments.setInputPath(args, "inputFile");
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCrossNSProgram.class.getSimpleName()).start(args);
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
// get the dataset from the other namespace where we expect it to exist and compare the data
DataSetManager<KeyValueTable> countManager = getDataset(outputNSMeta.getNamespaceId().dataset("count"));
KeyValueTable results = countManager.get();
for (int i = 0; i < 50; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertArrayEquals(key, results.read(key));
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class Spark2Test method testSparkWithLocalFiles.
private void testSparkWithLocalFiles(Class<? extends Application> appClass, String sparkProgram, String prefix) throws Exception {
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, appClass);
URI localFile = createLocalPropertiesFile(prefix);
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(Collections.singletonMap(SparkAppUsingLocalFiles.LOCAL_FILE_RUNTIME_ARG, localFile.toString()));
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(120, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> kvTableManager = getDataset(SparkAppUsingLocalFiles.OUTPUT_DATASET_NAME);
KeyValueTable kvTable = kvTableManager.get();
Map<String, String> expected = ImmutableMap.of("a", "1", "b", "2", "c", "3");
List<byte[]> deleteKeys = new ArrayList<>();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = kvTable.scan(null, null)) {
for (int i = 0; i < 3; i++) {
KeyValue<byte[], byte[]> next = scan.next();
Assert.assertEquals(expected.get(Bytes.toString(next.getKey())), Bytes.toString(next.getValue()));
deleteKeys.add(next.getKey());
}
Assert.assertFalse(scan.hasNext());
}
// Cleanup after run
kvTableManager.flush();
for (byte[] key : deleteKeys) {
kvTable.delete(key);
}
kvTableManager.flush();
}
Aggregations