use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class ServiceLifeCycleTestRun method testContentProducerLifecycle.
@Test
public void testContentProducerLifecycle() throws Exception {
try {
ApplicationManager appManager = deployWithArtifact(ServiceLifecycleApp.class, artifactJar);
// Set to have one thread only for testing context capture and release
serviceManager = appManager.getServiceManager("test").start(ImmutableMap.of(SystemArguments.SERVICE_THREADS, "1"));
final DataSetManager<KeyValueTable> datasetManager = getDataset(ServiceLifecycleApp.HANDLER_TABLE_NAME);
// Clean up the dataset first to avoid being affected by other tests
datasetManager.get().delete(Bytes.toBytes("called"));
datasetManager.get().delete(Bytes.toBytes("completed"));
datasetManager.flush();
// Starts 5 concurrent downloads
List<ListenableFuture<String>> completions = new ArrayList<>();
for (int i = 0; i < 5; i++) {
completions.add(download(serviceManager));
}
// Make sure all producers has produced something
Tasks.waitFor(true, () -> {
byte[] value = datasetManager.get().read("called");
datasetManager.flush();
return value != null && value.length == Bytes.SIZEOF_LONG && Bytes.toLong(value) > 5;
}, 10L, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// Get the states, there should be 6 handler instances instantiated, 5 the downloads, one for getState.
Multimap<Integer, String> states = getStates(serviceManager);
Assert.assertEquals(6, states.size());
// Set the complete flag in the dataset
datasetManager.get().write("completed", Bytes.toBytes(true));
datasetManager.flush();
// Wait for download to complete
Futures.allAsList(completions).get(10L, TimeUnit.SECONDS);
// Get the states again, it should still be 6 same instances
Assert.assertEquals(states, getStates(serviceManager));
} finally {
serviceManager.stop();
serviceManager.waitForStopped(10, TimeUnit.SECONDS);
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class PipelineTest method testWordCountSparkSink.
@SuppressWarnings("ConstantConditions")
@Test
public void testWordCountSparkSink() throws Exception {
String inputName = "sparkSinkInput";
String outputName = "sparkSinkOutput";
// create the pipeline config
ETLStage source = new ETLStage("source", MockSource.getPlugin(inputName));
Map<String, String> sinkProperties = new HashMap<>();
sinkProperties.put("field", "text");
sinkProperties.put("tableName", outputName);
ETLStage sink = new ETLStage("sink", new ETLPlugin(WordCountSink.NAME, SparkSink.PLUGIN_TYPE, sinkProperties, null));
ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
// create the pipeline
ApplicationId pipelineId = NamespaceId.DEFAULT.app("sparkSinkTestPipeline");
ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
// write the input
Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
DataSetManager<Table> inputManager = getDataset(inputName);
List<StructuredRecord> inputRecords = new ArrayList<>();
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
MockSource.writeInput(inputManager, inputRecords);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 4, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> outputManager = getDataset(outputName);
KeyValueTable output = outputManager.get();
Assert.assertEquals(3L, Bytes.toLong(output.read("Hello")));
Assert.assertEquals(1L, Bytes.toLong(output.read("World")));
Assert.assertEquals(2L, Bytes.toLong(output.read("my")));
Assert.assertEquals(2L, Bytes.toLong(output.read("name")));
Assert.assertEquals(2L, Bytes.toLong(output.read("is")));
Assert.assertEquals(1L, Bytes.toLong(output.read("Hal")));
Assert.assertEquals(1L, Bytes.toLong(output.read("Sam")));
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class StandaloneDatasetDefinition method getDataset.
@Override
public StandaloneDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
return new StandaloneDataset(spec.getName(), table);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestFrameworkTestRun method executeWorkflow.
private String executeWorkflow(ApplicationManager applicationManager, Map<String, String> additionalParams, int expectedComplete) throws Exception {
WorkflowManager wfManager = applicationManager.getWorkflowManager(WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
Map<String, String> runtimeArgs = new HashMap<>();
File waitFile = new File(TMP_FOLDER.newFolder(), "/wait.file");
File doneFile = new File(TMP_FOLDER.newFolder(), "/done.file");
runtimeArgs.put("input.path", "input");
runtimeArgs.put("output.path", "output");
runtimeArgs.put("wait.file", waitFile.getAbsolutePath());
runtimeArgs.put("done.file", doneFile.getAbsolutePath());
runtimeArgs.putAll(additionalParams);
wfManager.start(runtimeArgs);
// Wait until custom action in the Workflow is triggered.
while (!waitFile.exists()) {
TimeUnit.MILLISECONDS.sleep(50);
}
// Now the Workflow should have RUNNING status. Get its runid.
List<RunRecord> history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
// Get the local datasets for this Workflow run
DataSetManager<KeyValueTable> localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET + "." + runId));
Assert.assertEquals("2", Bytes.toString(localDataset.get().read("text")));
DataSetManager<FileSet> fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET + "." + runId));
Assert.assertNotNull(fileSetDataset.get());
// Local datasets should not exist at the namespace level
localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET));
Assert.assertNull(localDataset.get());
fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET));
Assert.assertNull(fileSetDataset.get());
// Verify that the workflow hasn't completed on its own before we signal it to
history = wfManager.getHistory(ProgramRunStatus.RUNNING);
Assert.assertEquals(1, history.size());
// Signal the Workflow to continue
doneFile.createNewFile();
// Wait for workflow to finish
wfManager.waitForRuns(ProgramRunStatus.COMPLETED, expectedComplete, 1, TimeUnit.MINUTES);
Map<String, WorkflowNodeStateDetail> nodeStateDetailMap = wfManager.getWorkflowNodeStates(runId);
Map<String, String> workflowMetricsContext = new HashMap<>();
workflowMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
workflowMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
workflowMetricsContext.put(Constants.Metrics.Tag.WORKFLOW, WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
workflowMetricsContext.put(Constants.Metrics.Tag.RUN_ID, runId);
Map<String, String> writerContext = new HashMap<>(workflowMetricsContext);
writerContext.put(Constants.Metrics.Tag.NODE, WorkflowAppWithLocalDatasets.LocalDatasetWriter.class.getSimpleName());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(writerContext, "user.num.lines"));
Map<String, String> wfSparkMetricsContext = new HashMap<>(workflowMetricsContext);
wfSparkMetricsContext.put(Constants.Metrics.Tag.NODE, "JavaSparkCSVToSpaceConverter");
Assert.assertEquals(2, getMetricsManager().getTotalMetric(wfSparkMetricsContext, "user.num.lines"));
// check in spark context
Map<String, String> sparkMetricsContext = new HashMap<>();
sparkMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
sparkMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
sparkMetricsContext.put(Constants.Metrics.Tag.SPARK, "JavaSparkCSVToSpaceConverter");
sparkMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("JavaSparkCSVToSpaceConverter").getRunId());
Assert.assertEquals(2, getMetricsManager().getTotalMetric(sparkMetricsContext, "user.num.lines"));
Map<String, String> appMetricsContext = new HashMap<>();
appMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
appMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
// app metrics context should have sum from custom action and spark metrics.
Assert.assertEquals(4, getMetricsManager().getTotalMetric(appMetricsContext, "user.num.lines"));
Map<String, String> wfMRMetricsContext = new HashMap<>(workflowMetricsContext);
wfMRMetricsContext.put(Constants.Metrics.Tag.NODE, "WordCount");
Assert.assertEquals(7, getMetricsManager().getTotalMetric(wfMRMetricsContext, "user.num.words"));
// mr metrics context
Map<String, String> mrMetricsContext = new HashMap<>();
mrMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
mrMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
mrMetricsContext.put(Constants.Metrics.Tag.MAPREDUCE, "WordCount");
mrMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("WordCount").getRunId());
Assert.assertEquals(7, getMetricsManager().getTotalMetric(mrMetricsContext, "user.num.words"));
final Map<String, String> readerContext = new HashMap<>(workflowMetricsContext);
readerContext.put(Constants.Metrics.Tag.NODE, "readerAction");
Tasks.waitFor(6L, new Callable<Long>() {
@Override
public Long call() throws Exception {
return getMetricsManager().getTotalMetric(readerContext, "user.unique.words");
}
}, 60, TimeUnit.SECONDS);
return runId;
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestFrameworkTestRun method testCrossNSMapperDatasetAccess.
@Category(SlowTests.class)
@Test
public void testCrossNSMapperDatasetAccess() throws Exception {
NamespaceMeta inputNS = new NamespaceMeta.Builder().setName("inputNS").build();
NamespaceMeta outputNS = new NamespaceMeta.Builder().setName("outputNS").build();
getNamespaceAdmin().create(inputNS);
getNamespaceAdmin().create(outputNS);
addDatasetInstance(inputNS.getNamespaceId().dataset("table1"), "keyValueTable");
addDatasetInstance(outputNS.getNamespaceId().dataset("table2"), "keyValueTable");
DataSetManager<KeyValueTable> tableManager = getDataset(inputNS.getNamespaceId().dataset("table1"));
KeyValueTable inputTable = tableManager.get();
inputTable.write("hello", "world");
tableManager.flush();
ApplicationManager appManager = deployApplication(DatasetCrossNSAccessWithMAPApp.class);
Map<String, String> argsForMR = ImmutableMap.of(DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NS, inputNS.getName(), DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NAME, "table1", DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NS, outputNS.getName(), DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NAME, "table2");
MapReduceManager mrManager = appManager.getMapReduceManager(DatasetCrossNSAccessWithMAPApp.MAPREDUCE_PROGRAM).start(argsForMR);
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
appManager.stopAll();
DataSetManager<KeyValueTable> outTableManager = getDataset(outputNS.getNamespaceId().dataset("table2"));
verifyMapperJobOutput(DatasetCrossNSAccessWithMAPApp.class, outTableManager);
}
Aggregations