Search in sources :

Example 51 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class PipelineTest method testWordCountSparkSink.

@SuppressWarnings("ConstantConditions")
@Test
public void testWordCountSparkSink() throws Exception {
    String inputName = "sparkSinkInput";
    String outputName = "sparkSinkOutput";
    // create the pipeline config
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputName));
    Map<String, String> sinkProperties = new HashMap<>();
    sinkProperties.put("field", "text");
    sinkProperties.put("tableName", outputName);
    ETLStage sink = new ETLStage("sink", new ETLPlugin(WordCountSink.NAME, SparkSink.PLUGIN_TYPE, sinkProperties, null));
    ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    // create the pipeline
    ApplicationId pipelineId = NamespaceId.DEFAULT.app("sparkSinkTestPipeline");
    ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
    // write the input
    Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
    DataSetManager<Table> inputManager = getDataset(inputName);
    List<StructuredRecord> inputRecords = new ArrayList<>();
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
    MockSource.writeInput(inputManager, inputRecords);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForFinish(4, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> outputManager = getDataset(outputName);
    KeyValueTable output = outputManager.get();
    Assert.assertEquals(3L, Bytes.toLong(output.read("Hello")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("World")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("my")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("name")));
    Assert.assertEquals(2L, Bytes.toLong(output.read("is")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("Hal")));
    Assert.assertEquals(1L, Bytes.toLong(output.read("Sam")));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 52 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkServiceIntegrationTestRun method testSparkWithService.

@Test
public void testSparkWithService() throws Exception {
    ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class);
    startService(applicationManager);
    SparkManager sparkManager = applicationManager.getSparkManager(TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
    DataSetManager<KeyValueTable> datasetManager = getDataset("result");
    KeyValueTable results = datasetManager.get();
    for (int i = 1; i <= 5; i++) {
        byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
        Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key))));
    }
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 53 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkTestRun method checkOutputData.

private void checkOutputData(DataSetManager<KeyValueTable> manager) {
    KeyValueTable count = manager.get();
    //read output and verify result
    byte[] val = count.read(Bytes.toBytes(TEST_STRING_1));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length());
    val = count.read(Bytes.toBytes(TEST_STRING_2));
    Assert.assertTrue(val != null);
    Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length());
}
Also used : KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable)

Example 54 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkTestRun method testTransaction.

@Test
public void testTransaction() throws Exception {
    ApplicationManager applicationManager = deploy(TestSparkApp.class);
    StreamManager streamManager = getStreamManager("SparkStream");
    // Write some sentences to the stream
    streamManager.send("red fox");
    streamManager.send("brown fox");
    streamManager.send("grey fox");
    streamManager.send("brown bear");
    streamManager.send("black bear");
    // Run the spark program
    SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
    sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
    // Verify result from dataset before the Spark program terminates
    final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
    final KeyValueTable resultTable = resultManager.get();
    // Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
    Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {

        @Override
        public Set<String> call() throws Exception {
            // This is to start a new TX
            resultManager.flush();
            LOG.info("Reading from threshold result");
            try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
                return ImmutableSet.copyOf(Iterators.transform(itor, new Function<KeyValue<byte[], byte[]>, String>() {

                    @Override
                    public String apply(KeyValue<byte[], byte[]> input) {
                        String word = Bytes.toString(input.getKey());
                        LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
                        return word;
                    }
                }));
            }
        }
    }, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    sparkManager.stop();
    sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) SparkManager(co.cask.cdap.test.SparkManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(co.cask.cdap.api.dataset.lib.FileSet) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) IOException(java.io.IOException) TransactionSpark(co.cask.cdap.spark.app.TransactionSpark) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Test(org.junit.Test)

Example 55 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class SparkTestRun method testSparkWithObjectStore.

@Test
public void testSparkWithObjectStore() throws Exception {
    ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
    DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
    prepareInputData(keysManager);
    SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> countManager = getDataset("count");
    checkOutputData(countManager);
    // validate that the table emitted metrics
    // one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
    Tasks.waitFor(4L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
            if (metrics.isEmpty()) {
                return 0L;
            }
            Assert.assertEquals(1, metrics.size());
            MetricTimeSeries ts = metrics.iterator().next();
            Assert.assertEquals(1, ts.getTimeValues().size());
            return ts.getTimeValues().get(0).getValue();
        }
    }, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) SparkAppUsingObjectStore(co.cask.cdap.spark.app.SparkAppUsingObjectStore) SparkManager(co.cask.cdap.test.SparkManager) MetricTimeSeries(co.cask.cdap.api.metrics.MetricTimeSeries) IOException(java.io.IOException) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Collection(java.util.Collection) MetricDataQuery(co.cask.cdap.api.metrics.MetricDataQuery) Test(org.junit.Test)

Aggregations

KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)69 Test (org.junit.Test)39 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 SparkManager (co.cask.cdap.test.SparkManager)16 IOException (java.io.IOException)14 StreamManager (co.cask.cdap.test.StreamManager)13 TransactionExecutor (org.apache.tephra.TransactionExecutor)11 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)10 ServiceManager (co.cask.cdap.test.ServiceManager)9 HashMap (java.util.HashMap)9 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)8 Table (co.cask.cdap.api.dataset.table.Table)8 WorkflowManager (co.cask.cdap.test.WorkflowManager)7 ArrayList (java.util.ArrayList)7 FileSet (co.cask.cdap.api.dataset.lib.FileSet)6 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)6 FlowManager (co.cask.cdap.test.FlowManager)6 MapReduceManager (co.cask.cdap.test.MapReduceManager)6 Category (org.junit.experimental.categories.Category)6 Schema (co.cask.cdap.api.data.schema.Schema)5