Search in sources :

Example 76 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class ServiceLifeCycleTestRun method testContentProducerLifecycle.

@Test
public void testContentProducerLifecycle() throws Exception {
    try {
        ApplicationManager appManager = deployWithArtifact(ServiceLifecycleApp.class, artifactJar);
        // Set to have one thread only for testing context capture and release
        serviceManager = appManager.getServiceManager("test").start(ImmutableMap.of(SystemArguments.SERVICE_THREADS, "1"));
        final DataSetManager<KeyValueTable> datasetManager = getDataset(ServiceLifecycleApp.HANDLER_TABLE_NAME);
        // Clean up the dataset first to avoid being affected by other tests
        datasetManager.get().delete(Bytes.toBytes("called"));
        datasetManager.get().delete(Bytes.toBytes("completed"));
        datasetManager.flush();
        // Starts 5 concurrent downloads
        List<ListenableFuture<String>> completions = new ArrayList<>();
        for (int i = 0; i < 5; i++) {
            completions.add(download(serviceManager));
        }
        // Make sure all producers has produced something
        Tasks.waitFor(true, () -> {
            byte[] value = datasetManager.get().read("called");
            datasetManager.flush();
            return value != null && value.length == Bytes.SIZEOF_LONG && Bytes.toLong(value) > 5;
        }, 10L, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
        // Get the states, there should be 6 handler instances instantiated, 5 the downloads, one for getState.
        Multimap<Integer, String> states = getStates(serviceManager);
        Assert.assertEquals(6, states.size());
        // Set the complete flag in the dataset
        datasetManager.get().write("completed", Bytes.toBytes(true));
        datasetManager.flush();
        // Wait for download to complete
        Futures.allAsList(completions).get(10L, TimeUnit.SECONDS);
        // Get the states again, it should still be 6 same instances
        Assert.assertEquals(states, getStates(serviceManager));
    } finally {
        serviceManager.stop();
        serviceManager.waitForStopped(10, TimeUnit.SECONDS);
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Test(org.junit.Test)

Example 77 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class PipelineTest method testWordCountSparkSink.

@SuppressWarnings("ConstantConditions")
@Test
public void testWordCountSparkSink() throws Exception {
    String inputName = "sparkSinkInput";
    String outputName = "sparkSinkOutput";
    // create the pipeline config
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputName));
    Map<String, String> sinkProperties = new HashMap<>();
    sinkProperties.put("field", "text");
    sinkProperties.put("tableName", outputName);
    ETLStage sink = new ETLStage("sink", new ETLPlugin(WordCountSink.NAME, SparkSink.PLUGIN_TYPE, sinkProperties, null));
    ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    // create the pipeline
    ApplicationId pipelineId = NamespaceId.DEFAULT.app("sparkSinkTestPipeline");
    ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
    // write the input
    Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
    DataSetManager<Table> inputManager = getDataset(inputName);
    List<StructuredRecord> inputRecords = new ArrayList<>();
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
    MockSource.writeInput(inputManager, inputRecords);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 4, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> outputManager = getDataset(outputName);
    KeyValueTable output = outputManager.get();
    Assert.assertEquals(3L, Bytes.toLong(output.read("Hello")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("World")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("my")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("name")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("is")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("Hal")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("Sam")));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 78 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class StandaloneDatasetDefinition method getDataset.

@Override
public StandaloneDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    DatasetSpecification kvTableSpec = spec.getSpecification("objects");
    KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
    return new StandaloneDataset(spec.getName(), table);
}
Also used : KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification)

Example 79 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class TestFrameworkTestRun method executeWorkflow.

private String executeWorkflow(ApplicationManager applicationManager, Map<String, String> additionalParams, int expectedComplete) throws Exception {
    WorkflowManager wfManager = applicationManager.getWorkflowManager(WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
    Map<String, String> runtimeArgs = new HashMap<>();
    File waitFile = new File(TMP_FOLDER.newFolder(), "/wait.file");
    File doneFile = new File(TMP_FOLDER.newFolder(), "/done.file");
    runtimeArgs.put("input.path", "input");
    runtimeArgs.put("output.path", "output");
    runtimeArgs.put("wait.file", waitFile.getAbsolutePath());
    runtimeArgs.put("done.file", doneFile.getAbsolutePath());
    runtimeArgs.putAll(additionalParams);
    wfManager.start(runtimeArgs);
    // Wait until custom action in the Workflow is triggered.
    while (!waitFile.exists()) {
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // Now the Workflow should have RUNNING status. Get its runid.
    List<RunRecord> history = wfManager.getHistory(ProgramRunStatus.RUNNING);
    Assert.assertEquals(1, history.size());
    String runId = history.get(0).getPid();
    // Get the local datasets for this Workflow run
    DataSetManager<KeyValueTable> localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET + "." + runId));
    Assert.assertEquals("2", Bytes.toString(localDataset.get().read("text")));
    DataSetManager<FileSet> fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET + "." + runId));
    Assert.assertNotNull(fileSetDataset.get());
    // Local datasets should not exist at the namespace level
    localDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET));
    Assert.assertNull(localDataset.get());
    fileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET));
    Assert.assertNull(fileSetDataset.get());
    // Verify that the workflow hasn't completed on its own before we signal it to
    history = wfManager.getHistory(ProgramRunStatus.RUNNING);
    Assert.assertEquals(1, history.size());
    // Signal the Workflow to continue
    doneFile.createNewFile();
    // Wait for workflow to finish
    wfManager.waitForRuns(ProgramRunStatus.COMPLETED, expectedComplete, 1, TimeUnit.MINUTES);
    Map<String, WorkflowNodeStateDetail> nodeStateDetailMap = wfManager.getWorkflowNodeStates(runId);
    Map<String, String> workflowMetricsContext = new HashMap<>();
    workflowMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
    workflowMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
    workflowMetricsContext.put(Constants.Metrics.Tag.WORKFLOW, WorkflowAppWithLocalDatasets.WORKFLOW_NAME);
    workflowMetricsContext.put(Constants.Metrics.Tag.RUN_ID, runId);
    Map<String, String> writerContext = new HashMap<>(workflowMetricsContext);
    writerContext.put(Constants.Metrics.Tag.NODE, WorkflowAppWithLocalDatasets.LocalDatasetWriter.class.getSimpleName());
    Assert.assertEquals(2, getMetricsManager().getTotalMetric(writerContext, "user.num.lines"));
    Map<String, String> wfSparkMetricsContext = new HashMap<>(workflowMetricsContext);
    wfSparkMetricsContext.put(Constants.Metrics.Tag.NODE, "JavaSparkCSVToSpaceConverter");
    Assert.assertEquals(2, getMetricsManager().getTotalMetric(wfSparkMetricsContext, "user.num.lines"));
    // check in spark context
    Map<String, String> sparkMetricsContext = new HashMap<>();
    sparkMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
    sparkMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
    sparkMetricsContext.put(Constants.Metrics.Tag.SPARK, "JavaSparkCSVToSpaceConverter");
    sparkMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("JavaSparkCSVToSpaceConverter").getRunId());
    Assert.assertEquals(2, getMetricsManager().getTotalMetric(sparkMetricsContext, "user.num.lines"));
    Map<String, String> appMetricsContext = new HashMap<>();
    appMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
    appMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
    // app metrics context should have sum from custom action and spark metrics.
    Assert.assertEquals(4, getMetricsManager().getTotalMetric(appMetricsContext, "user.num.lines"));
    Map<String, String> wfMRMetricsContext = new HashMap<>(workflowMetricsContext);
    wfMRMetricsContext.put(Constants.Metrics.Tag.NODE, "WordCount");
    Assert.assertEquals(7, getMetricsManager().getTotalMetric(wfMRMetricsContext, "user.num.words"));
    // mr metrics context
    Map<String, String> mrMetricsContext = new HashMap<>();
    mrMetricsContext.put(Constants.Metrics.Tag.NAMESPACE, testSpace.getNamespace());
    mrMetricsContext.put(Constants.Metrics.Tag.APP, applicationManager.getInfo().getName());
    mrMetricsContext.put(Constants.Metrics.Tag.MAPREDUCE, "WordCount");
    mrMetricsContext.put(Constants.Metrics.Tag.RUN_ID, nodeStateDetailMap.get("WordCount").getRunId());
    Assert.assertEquals(7, getMetricsManager().getTotalMetric(mrMetricsContext, "user.num.words"));
    final Map<String, String> readerContext = new HashMap<>(workflowMetricsContext);
    readerContext.put(Constants.Metrics.Tag.NODE, "readerAction");
    Tasks.waitFor(6L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            return getMetricsManager().getTotalMetric(readerContext, "user.unique.words");
        }
    }, 60, TimeUnit.SECONDS);
    return runId;
}
Also used : FileSet(io.cdap.cdap.api.dataset.lib.FileSet) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) IOException(java.io.IOException) ConflictException(io.cdap.cdap.common.ConflictException) WorkflowNodeStateDetail(io.cdap.cdap.proto.WorkflowNodeStateDetail) RunRecord(io.cdap.cdap.proto.RunRecord) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) File(java.io.File)

Example 80 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class TestFrameworkTestRun method testCrossNSMapperDatasetAccess.

@Category(SlowTests.class)
@Test
public void testCrossNSMapperDatasetAccess() throws Exception {
    NamespaceMeta inputNS = new NamespaceMeta.Builder().setName("inputNS").build();
    NamespaceMeta outputNS = new NamespaceMeta.Builder().setName("outputNS").build();
    getNamespaceAdmin().create(inputNS);
    getNamespaceAdmin().create(outputNS);
    addDatasetInstance(inputNS.getNamespaceId().dataset("table1"), "keyValueTable");
    addDatasetInstance(outputNS.getNamespaceId().dataset("table2"), "keyValueTable");
    DataSetManager<KeyValueTable> tableManager = getDataset(inputNS.getNamespaceId().dataset("table1"));
    KeyValueTable inputTable = tableManager.get();
    inputTable.write("hello", "world");
    tableManager.flush();
    ApplicationManager appManager = deployApplication(DatasetCrossNSAccessWithMAPApp.class);
    Map<String, String> argsForMR = ImmutableMap.of(DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NS, inputNS.getName(), DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NAME, "table1", DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NS, outputNS.getName(), DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NAME, "table2");
    MapReduceManager mrManager = appManager.getMapReduceManager(DatasetCrossNSAccessWithMAPApp.MAPREDUCE_PROGRAM).start(argsForMR);
    mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    appManager.stopAll();
    DataSetManager<KeyValueTable> outTableManager = getDataset(outputNS.getNamespaceId().dataset("table2"));
    verifyMapperJobOutput(DatasetCrossNSAccessWithMAPApp.class, outTableManager);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) MapReduceManager(io.cdap.cdap.test.MapReduceManager) NamespaceMeta(io.cdap.cdap.proto.NamespaceMeta) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)122 Test (org.junit.Test)65 ApplicationManager (io.cdap.cdap.test.ApplicationManager)59 HashMap (java.util.HashMap)27 SparkManager (io.cdap.cdap.test.SparkManager)26 Table (io.cdap.cdap.api.dataset.table.Table)21 TransactionExecutor (org.apache.tephra.TransactionExecutor)20 WorkflowManager (io.cdap.cdap.test.WorkflowManager)19 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 KeyValue (io.cdap.cdap.api.dataset.lib.KeyValue)14 ServiceManager (io.cdap.cdap.test.ServiceManager)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 Location (org.apache.twill.filesystem.Location)14 ImmutableMap (com.google.common.collect.ImmutableMap)13 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)13 File (java.io.File)13 URL (java.net.URL)12