use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by cdapio.
the class SparkTest method testSparkWithGetDataset.
private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
ApplicationManager applicationManager = deploy(appClass);
DataSetManager<FileSet> filesetManager = getDataset("logs");
FileSet fileset = filesetManager.get();
Location location = fileset.getLocation("nn");
prepareInputFileSetWithLogData(location);
Map<String, String> inputArgs = new HashMap<>();
FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
args.put("input", "logs");
args.put("output", "logStats");
SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram);
sparkManager.startAndWaitForGoodRun(args, ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
KeyValueTable logStatsTable = logStatsManager.get();
validateGetDatasetOutput(logStatsTable);
// Cleanup after run
location.delete(true);
logStatsManager.flush();
try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
while (scan.hasNext()) {
logStatsTable.delete(scan.next().getKey());
}
}
logStatsManager.flush();
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by cdapio.
the class TestFrameworkTestRun method executeWorkflow.
private String executeWorkflow(ApplicationManager applicationManager, Map<String, String> additionalParams, int expectedComplete) throws Exception {
WorkflowManager wfManager = applicationManager.getWorkflowManager(WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
Map<String, String> runtimeArgs = new HashMap<>();
File waitFile = new File(TMP_FOLDER.newFolder(), "/wait.file");
File doneFile = new File(TMP_FOLDER.newFolder(), "/done.file");
runtimeArgs.put("input.path", "input");
runtimeArgs.put("output.path", "output");
runtimeArgs.put("wait.file", waitFile.getAbsolutePath());
runtimeArgs.put("done.file", doneFile.getAbsolutePath());
runtimeArgs.putAll(additionalParams);
wfManager.start(runtimeArgs);
// Wait until custom action in the Workflow is triggered.
while (!waitFile.exists()) {
TimeUnit.MILLISECONDS.sleep(50);
}
// Now the Workflow should have RUNNING status. Get its runid.
List<RunRecord> history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
// Get the local datasets for this Workflow run
DataSetManager<KeyValueTable> localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET + "." + runId));
Assert.assertEquals("2", Bytes.toString(localDataset.get().read("text")));
DataSetManager<FileSet> fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET + "." + runId));
Assert.assertNotNull(fileSetDataset.get());
// Local datasets should not exist at the namespace level
localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET));
Assert.assertNull(localDataset.get());
fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET));
Assert.assertNull(fileSetDataset.get());
// Verify that the workflow hasn't completed on its own before we signal it to
history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
// Signal the Workflow to continue
doneFile.createNewFile();
// Wait for workflow to finish
wfManager.waitForRuns(ProgramRunStatus.COMPLETED, expectedComplete, 1, TimeUnit.MINUTES);
Map<String, WorkflowNodeStateDetail> nodeStateDetailMap = wfManager.getWorkflowNodeStates(runId);
Map<String, String> workflowMetricsContext = new HashMap<>();
workflowMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
workflowMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
workflowMetricsContext.put(Constants.Metrics.Tag.WORKFLOW, WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
workflowMetricsContext.put(Constants.Metrics.Tag.RUN_ID, runId);
Map<String, String> writerContext = new HashMap<>(workflowMetricsContext);
writerContext.put(Constants.Metrics.Tag.NODE, WorkflowAppWithLocalDatasets.LocalDatasetWriter.class.getSimpleName());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(writerContext, "user.num.lines"));
Map<String, String> wfSparkMetricsContext = new HashMap<>(workflowMetricsContext);
wfSparkMetricsContext.put(Constants.Metrics.Tag.NODE, "JavaSparkCSVToSpaceConverter");
Assert.assertEquals(2, getMetricsManager().getTotalMetric(wfSparkMetricsContext, "user.num.lines"));
// check in spark context
Map<String, String> sparkMetricsContext = new HashMap<>();
sparkMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
sparkMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
sparkMetricsContext.put(Constants.Metrics.Tag.SPARK, "JavaSparkCSVToSpaceConverter");
sparkMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("JavaSparkCSVToSpaceConverter").getRunId());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(sparkMetricsContext, "user.num.lines"));
Map<String, String> appMetricsContext = new HashMap<>();
appMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
appMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
// app metrics context should have sum from custom action and spark metrics.
Assert.assertEquals(4, getMetricsManager().getTotalMetric(appMetricsContext, "user.num.lines"));
Map<String, String> wfMRMetricsContext = new HashMap<>(workflowMetricsContext);
wfMRMetricsContext.put(Constants.Metrics.Tag.NODE, "WordCount");
Assert.assertEquals(7, getMetricsManager().getTotalMetric(wfMRMetricsContext, "user.num.words"));
// mr metrics context
Map<String, String> mrMetricsContext = new HashMap<>();
mrMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
mrMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
mrMetricsContext.put(Constants.Metrics.Tag.MAPREDUCE, "WordCount");
mrMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("WordCount").getRunId());
Assert.assertEquals(7, getMetricsManager().getTotalMetric(mrMetricsContext, "user.num.words"));
final Map<String, String> readerContext = new HashMap<>(workflowMetricsContext);
readerContext.put(Constants.Metrics.Tag.NODE, "readerAction");
Tasks.waitFor(6L, new Callable<Long>() {
@Override
public Long call() throws Exception {
return getMetricsManager().getTotalMetric(readerContext, "user.unique.words");
}
}, 60, TimeUnit.SECONDS);
return runId;
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by cdapio.
the class Spark2Test method testScalaSparkCrossNSStream.
@Test
public void testScalaSparkCrossNSStream() throws Exception {
// create a namespace for input and create a file set instance
NamespaceMeta inputNSMeta = new NamespaceMeta.Builder().setName("inputSpaceForSpark").build();
getNamespaceAdmin().create(inputNSMeta);
DatasetId inputDatasetId = inputNSMeta.getNamespaceId().dataset("input");
addDatasetInstance(FileSet.class.getName(), inputDatasetId, FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
// create a namespace for dataset and add the dataset instance in it
NamespaceMeta outputNSMeta = new NamespaceMeta.Builder().setName("crossNSDataset").build();
getNamespaceAdmin().create(outputNSMeta);
addDatasetInstance(outputNSMeta.getNamespaceId().dataset("count"), "keyValueTable");
// write something to the input dataset
Location inputFile = this.<FileSet>getDataset(inputDatasetId).get().getLocation("inputFile");
try (PrintStream printer = new PrintStream(inputFile.getOutputStream(), true, "UTF-8")) {
for (int i = 0; i < 50; i++) {
printer.println(String.valueOf(i));
}
}
// deploy the spark app in another namespace (default)
ApplicationManager applicationManager = deploy(NamespaceId.DEFAULT, SparkAppUsingObjectStore.class);
Map<String, String> args = new HashMap<>();
args.put(ScalaCrossNSProgram.INPUT_NAMESPACE(), inputNSMeta.getNamespaceId().getNamespace());
args.put(ScalaCrossNSProgram.OUTPUT_NAMESPACE(), outputNSMeta.getNamespaceId().getNamespace());
args.put(ScalaCrossNSProgram.OUTPUT_NAME(), "count");
FileSetArguments.setInputPath(args, "inputFile");
SparkManager sparkManager = applicationManager.getSparkManager(ScalaCrossNSProgram.class.getSimpleName()).start(args);
sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
sparkManager.waitForStopped(60, TimeUnit.SECONDS);
// get the dataset from the other namespace where we expect it to exist and compare the data
DataSetManager<KeyValueTable> countManager = getDataset(outputNSMeta.getNamespaceId().dataset("count"));
KeyValueTable results = countManager.get();
for (int i = 0; i < 50; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertArrayEquals(key, results.read(key));
}
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by cdapio.
the class AdminAppTestRun method testAdminService.
@Test
public void testAdminService() throws Exception {
// Start the service
ServiceManager serviceManager = appManager.getServiceManager(AdminApp.SERVICE_NAME).start();
String namespaceX = "x";
try {
URI serviceURI = serviceManager.getServiceURL(10, TimeUnit.SECONDS).toURI();
// dataset nn should not exist
HttpResponse response = executeHttp(HttpRequest.get(serviceURI.resolve("exists/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
Assert.assertEquals("false", response.getResponseBodyAsString());
// create nn as a table
response = executeHttp(HttpRequest.put(serviceURI.resolve("create/nn/table").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
// now nn should exist
response = executeHttp(HttpRequest.get(serviceURI.resolve("exists/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
Assert.assertEquals("true", response.getResponseBodyAsString());
// create it again as a fileset -> should fail with conflict
response = executeHttp(HttpRequest.put(serviceURI.resolve("create/nn/fileSet").toURL()).build());
Assert.assertEquals(409, response.getResponseCode());
// get the type for xx -> not found
response = executeHttp(HttpRequest.get(serviceURI.resolve("type/xx").toURL()).build());
Assert.assertEquals(404, response.getResponseCode());
// get the type for nn -> table
response = executeHttp(HttpRequest.get(serviceURI.resolve("type/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
Assert.assertEquals("table", response.getResponseBodyAsString());
// update xx's properties -> should get not-found
Map<String, String> nnProps = TableProperties.builder().setTTL(1000L).build().getProperties();
response = executeHttp(HttpRequest.put(serviceURI.resolve("update/xx").toURL()).withBody(GSON.toJson(nnProps)).build());
Assert.assertEquals(404, response.getResponseCode());
// update nn's properties
response = executeHttp(HttpRequest.put(serviceURI.resolve("update/nn").toURL()).withBody(GSON.toJson(nnProps)).build());
Assert.assertEquals(200, response.getResponseCode());
// get properties for xx -> not found
response = executeHttp(HttpRequest.get(serviceURI.resolve("props/xx").toURL()).build());
Assert.assertEquals(404, response.getResponseCode());
// get properties for nn and validate
response = executeHttp(HttpRequest.get(serviceURI.resolve("props/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
Map<String, String> returnedProps = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<Map<String, String>>() {
}.getType());
Assert.assertEquals(nnProps, returnedProps);
// write some data to the table
DataSetManager<Table> nnManager = getDataset("nn");
nnManager.get().put(new Put("x", "y", "z"));
nnManager.flush();
// in a new tx, validate that data is in table
Assert.assertFalse(nnManager.get().get(new Get("x")).isEmpty());
Assert.assertEquals("z", nnManager.get().get(new Get("x", "y")).getString("y"));
nnManager.flush();
// truncate xx -> not found
response = executeHttp(HttpRequest.post(serviceURI.resolve("truncate/xx").toURL()).build());
Assert.assertEquals(404, response.getResponseCode());
// truncate nn
response = executeHttp(HttpRequest.post(serviceURI.resolve("truncate/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
// validate table is empty
Assert.assertTrue(nnManager.get().get(new Get("x")).isEmpty());
nnManager.flush();
// delete nn
response = executeHttp(HttpRequest.delete(serviceURI.resolve("delete/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
// delete again -> not found
response = executeHttp(HttpRequest.delete(serviceURI.resolve("delete/nn").toURL()).build());
Assert.assertEquals(404, response.getResponseCode());
// delete xx which never existed -> not found
response = executeHttp(HttpRequest.delete(serviceURI.resolve("delete/xx").toURL()).build());
Assert.assertEquals(404, response.getResponseCode());
// exists should now return false for nn
response = executeHttp(HttpRequest.get(serviceURI.resolve("exists/nn").toURL()).build());
Assert.assertEquals(200, response.getResponseCode());
Assert.assertEquals("false", response.getResponseBodyAsString());
Assert.assertNull(getDataset("nn").get());
// test Admin.namespaceExists()
HttpRequest request = HttpRequest.get(serviceURI.resolve("namespaces/y").toURL()).build();
response = executeHttp(request);
Assert.assertEquals(404, response.getResponseCode());
// test Admin.getNamespaceSummary()
NamespaceMeta namespaceXMeta = new NamespaceMeta.Builder().setName(namespaceX).setGeneration(10L).build();
getNamespaceAdmin().create(namespaceXMeta);
request = HttpRequest.get(serviceURI.resolve("namespaces/" + namespaceX).toURL()).build();
response = executeHttp(request);
NamespaceSummary namespaceSummary = GSON.fromJson(response.getResponseBodyAsString(), NamespaceSummary.class);
NamespaceSummary expectedX = new NamespaceSummary(namespaceXMeta.getName(), namespaceXMeta.getDescription(), namespaceXMeta.getGeneration());
Assert.assertEquals(expectedX, namespaceSummary);
// test ArtifactManager.listArtifacts()
ArtifactId pluginArtifactId = new NamespaceId(namespaceX).artifact("r1", "1.0.0");
// add a plugin artifact to namespace X
addPluginArtifact(pluginArtifactId, ADMIN_APP_ARTIFACT, DummyPlugin.class);
// no plugins should be listed in the default namespace, but the app artifact should
request = HttpRequest.get(serviceURI.resolve("namespaces/default/plugins").toURL()).build();
response = executeHttp(request);
Assert.assertEquals(200, response.getResponseCode());
Type setType = new TypeToken<Set<ArtifactSummary>>() {
}.getType();
Assert.assertEquals(Collections.singleton(ADMIN_ARTIFACT_SUMMARY), GSON.fromJson(response.getResponseBodyAsString(), setType));
// the plugin should be listed in namespace X
request = HttpRequest.get(serviceURI.resolve("namespaces/x/plugins").toURL()).build();
response = executeHttp(request);
Assert.assertEquals(200, response.getResponseCode());
ArtifactSummary expected = new ArtifactSummary(pluginArtifactId.getArtifact(), pluginArtifactId.getVersion());
Assert.assertEquals(Collections.singleton(expected), GSON.fromJson(response.getResponseBodyAsString(), setType));
} finally {
serviceManager.stop();
if (getNamespaceAdmin().exists(new NamespaceId(namespaceX))) {
getNamespaceAdmin().delete(new NamespaceId(namespaceX));
}
}
}
use of io.cdap.cdap.api.dataset.lib.FileSet in project cdap by cdapio.
the class AdminAppTestRun method testAdminProgram.
private <T extends ProgramManager<T>> void testAdminProgram(ProgramManager<T> manager) throws Exception {
// create fileset b; it will be updated by the worker
addDatasetInstance(FileSet.class.getName(), "b", FileSetProperties.builder().setBasePath("some/path").setInputFormat(TextInputFormat.class).build());
DataSetManager<FileSet> bManager = getDataset("b");
String bFormat = bManager.get().getInputFormatClassName();
String bPath = bManager.get().getBaseLocation().toURI().getPath();
Assert.assertTrue(bPath.endsWith("some/path/"));
bManager.flush();
// create table c and write some data to it; it will be truncated by the worker
addDatasetInstance("table", "c");
DataSetManager<Table> cManager = getDataset("c");
cManager.get().put(new Put("x", "y", "z"));
cManager.flush();
// create table d; it will be dropped by the worker
addDatasetInstance("table", "d");
// start the worker and wait for it to finish
File newBasePath = new File(TMP_FOLDER.newFolder(), "extra");
Assert.assertFalse(newBasePath.exists());
manager.start(ImmutableMap.of("new.base.path", newBasePath.getPath()));
manager.waitForRun(ProgramRunStatus.COMPLETED, 30, TimeUnit.SECONDS);
// validate that worker created dataset a
DataSetManager<Table> aManager = getDataset("a");
Assert.assertNull(aManager.get().scan(null, null).next());
aManager.flush();
// validate that worker update fileset b, Get a new instance of b
bManager = getDataset("b");
Assert.assertEquals(bFormat, bManager.get().getInputFormatClassName());
String newBPath = bManager.get().getBaseLocation().toURI().getPath();
Assert.assertTrue(newBPath.endsWith("/extra/"));
// make sure the directory was created by fileset update (by moving the existing base path)
Assert.assertTrue(newBasePath.exists());
bManager.flush();
// validate that dataset c is empty
Assert.assertNull(cManager.get().scan(null, null).next());
cManager.flush();
// validate that dataset d is gone
Assert.assertNull(getDataset("d").get());
// run the worker again to drop all datasets
manager.start(ImmutableMap.of("dropAll", "true"));
manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 30, TimeUnit.SECONDS);
Assert.assertNull(getDataset("a").get());
Assert.assertNull(getDataset("b").get());
Assert.assertNull(getDataset("c").get());
Assert.assertNull(getDataset("d").get());
}
Aggregations