use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class ObjectStoreDefinition method getDataset.
@Override
public ObjectStoreDataset<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
TypeRepresentation typeRep = GSON.fromJson(spec.getProperty("type"), TypeRepresentation.class);
Schema schema = GSON.fromJson(spec.getProperty("schema"), Schema.class);
return new ObjectStoreDataset(spec.getName(), table, typeRep, schema, classLoader);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestDatasetDefinition method getDataset.
@Override
public TestDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("kv");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, DatasetDefinition.NO_ARGUMENTS, classLoader);
return new TestDataset(spec, table, arguments);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestDatasetModule method register.
@Override
public void register(DatasetDefinitionRegistry registry) {
DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = registry.get("keyValueTable");
DatasetDefinition definition = new TestDatasetDefinition("testDataset", kvTableDef);
registry.add(definition);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class WordCountTest method testWordCount.
@Test
public void testWordCount() throws Exception {
WordCount.WordCountConfig config = new WordCount.WordCountConfig("words", "stats", "counts", "unique", "assoc");
// Deploy the Application
ApplicationManager appManager = deployApplication(WordCount.class, config);
// validate that the wordCount table is empty, and that it has no entry for "world"
DataSetManager<KeyValueTable> datasetManager = getDataset(config.getWordCountTable());
KeyValueTable wordCounts = datasetManager.get();
Assert.assertNull(wordCounts.read("world"));
// Start the Flow
FlowManager flowManager = appManager.getFlowManager("WordCounter").start();
// Send a few events to the stream
StreamManager streamManager = getStreamManager("words");
streamManager.send("hello world");
streamManager.send("a wonderful world");
streamManager.send("the world says hello");
// Wait for the events to be processed, or at most 5 seconds
RuntimeMetrics metrics = flowManager.getFlowletMetrics("associator");
metrics.waitForProcessed(3, 5, TimeUnit.SECONDS);
// start a new transaction so that the "wordCounts" dataset sees changes made by the flow
datasetManager.flush();
Assert.assertEquals(3L, Bytes.toLong(wordCounts.read("world")));
// Start RetrieveCounts service
ServiceManager serviceManager = appManager.getServiceManager(RetrieveCounts.SERVICE_NAME).start();
// Wait service startup
serviceManager.waitForStatus(true);
// First verify global statistics
String response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "stats"));
Map<String, String> map = new Gson().fromJson(response, STRING_MAP_TYPE);
Assert.assertEquals("9", map.get("totalWords"));
Assert.assertEquals("6", map.get("uniqueWords"));
Assert.assertEquals(((double) 42) / 9, Double.valueOf(map.get("averageLength")), 0.001);
// Now verify statistics for a specific word
response = requestService(new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS), "count/world"));
Map<String, Object> omap = new Gson().fromJson(response, OBJECT_MAP_TYPE);
Assert.assertEquals("world", omap.get("word"));
Assert.assertEquals(3.0, omap.get("count"));
// The associations are a map within the map
@SuppressWarnings("unchecked") Map<String, Double> assocs = (Map<String, Double>) omap.get("assocs");
Assert.assertEquals(2.0, assocs.get("hello"), 0.000001);
Assert.assertTrue(assocs.containsKey("hello"));
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestFrameworkTestRun method executeWorkflow.
private String executeWorkflow(ApplicationManager applicationManager, Map<String, String> additionalParams, int expectedComplete) throws Exception {
WorkflowManager wfManager = applicationManager.getWorkflowManager(WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
Map<String, String> runtimeArgs = new HashMap<>();
File waitFile = new File(TMP_FOLDER.newFolder(), "/wait.file");
File doneFile = new File(TMP_FOLDER.newFolder(), "/done.file");
runtimeArgs.put("input.path", "input");
runtimeArgs.put("output.path", "output");
runtimeArgs.put("wait.file", waitFile.getAbsolutePath());
runtimeArgs.put("done.file", doneFile.getAbsolutePath());
runtimeArgs.putAll(additionalParams);
wfManager.start(runtimeArgs);
// Wait until custom action in the Workflow is triggered.
while (!waitFile.exists()) {
TimeUnit.MILLISECONDS.sleep(50);
}
// Now the Workflow should have RUNNING status. Get its runid.
List<RunRecord> history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
// Get the local datasets for this Workflow run
DataSetManager<KeyValueTable> localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET + "." + runId));
Assert.assertEquals("2", Bytes.toString(localDataset.get().read("text")));
DataSetManager<FileSet> fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET + "." + runId));
Assert.assertNotNull(fileSetDataset.get());
// Local datasets should not exist at the namespace level
localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET));
Assert.assertNull(localDataset.get());
fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET));
Assert.assertNull(fileSetDataset.get());
// Verify that the workflow hasn't completed on its own before we signal it to
history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
// Signal the Workflow to continue
doneFile.createNewFile();
// Wait for workflow to finish
wfManager.waitForRuns(ProgramRunStatus.COMPLETED, expectedComplete, 1, TimeUnit.MINUTES);
Map<String, WorkflowNodeStateDetail> nodeStateDetailMap = wfManager.getWorkflowNodeStates(runId);
Map<String, String> workflowMetricsContext = new HashMap<>();
workflowMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
workflowMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
workflowMetricsContext.put(Constants.Metrics.Tag.WORKFLOW, WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
workflowMetricsContext.put(Constants.Metrics.Tag.RUN_ID, runId);
Map<String, String> writerContext = new HashMap<>(workflowMetricsContext);
writerContext.put(Constants.Metrics.Tag.NODE, WorkflowAppWithLocalDatasets.LocalDatasetWriter.class.getSimpleName());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(writerContext, "user.num.lines"));
Map<String, String> wfSparkMetricsContext = new HashMap<>(workflowMetricsContext);
wfSparkMetricsContext.put(Constants.Metrics.Tag.NODE, "JavaSparkCSVToSpaceConverter");
Assert.assertEquals(2, getMetricsManager().getTotalMetric(wfSparkMetricsContext, "user.num.lines"));
// check in spark context
Map<String, String> sparkMetricsContext = new HashMap<>();
sparkMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
sparkMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
sparkMetricsContext.put(Constants.Metrics.Tag.SPARK, "JavaSparkCSVToSpaceConverter");
sparkMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("JavaSparkCSVToSpaceConverter").getRunId());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(sparkMetricsContext, "user.num.lines"));
Map<String, String> appMetricsContext = new HashMap<>();
appMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
appMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
// app metrics context should have sum from custom action and spark metrics.
Assert.assertEquals(4, getMetricsManager().getTotalMetric(appMetricsContext, "user.num.lines"));
Map<String, String> wfMRMetricsContext = new HashMap<>(workflowMetricsContext);
wfMRMetricsContext.put(Constants.Metrics.Tag.NODE, "WordCount");
Assert.assertEquals(7, getMetricsManager().getTotalMetric(wfMRMetricsContext, "user.num.words"));
// mr metrics context
Map<String, String> mrMetricsContext = new HashMap<>();
mrMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
mrMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
mrMetricsContext.put(Constants.Metrics.Tag.MAPREDUCE, "WordCount");
mrMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("WordCount").getRunId());
Assert.assertEquals(7, getMetricsManager().getTotalMetric(mrMetricsContext, "user.num.words"));
final Map<String, String> readerContext = new HashMap<>(workflowMetricsContext);
readerContext.put(Constants.Metrics.Tag.NODE, "readerAction");
Tasks.waitFor(6L, new Callable<Long>() {
@Override
public Long call() throws Exception {
return getMetricsManager().getTotalMetric(readerContext, "user.unique.words");
}
}, 60, TimeUnit.SECONDS);
return runId;
}
Aggregations