use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class PipelineTest method testWordCountSparkSink.
@SuppressWarnings("ConstantConditions")
@Test
public void testWordCountSparkSink() throws Exception {
String inputName = "sparkSinkInput";
String outputName = "sparkSinkOutput";
// create the pipeline config
ETLStage source = new ETLStage("source", MockSource.getPlugin(inputName));
Map<String, String> sinkProperties = new HashMap<>();
sinkProperties.put("field", "text");
sinkProperties.put("tableName", outputName);
ETLStage sink = new ETLStage("sink", new ETLPlugin(WordCountSink.NAME, SparkSink.PLUGIN_TYPE, sinkProperties, null));
ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
// create the pipeline
ApplicationId pipelineId = NamespaceId.DEFAULT.app("sparkSinkTestPipeline");
ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
// write the input
Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
DataSetManager<Table> inputManager = getDataset(inputName);
List<StructuredRecord> inputRecords = new ArrayList<>();
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
MockSource.writeInput(inputManager, inputRecords);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForFinish(4, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> outputManager = getDataset(outputName);
KeyValueTable output = outputManager.get();
Assert.assertEquals(3L, Bytes.toLong(output.read("Hello")));
Assert.assertEquals(1L, Bytes.toLong(output.read("World")));
Assert.assertEquals(2L, Bytes.toLong(output.read("my")));
Assert.assertEquals(2L, Bytes.toLong(output.read("name")));
Assert.assertEquals(2L, Bytes.toLong(output.read("is")));
Assert.assertEquals(1L, Bytes.toLong(output.read("Hal")));
Assert.assertEquals(1L, Bytes.toLong(output.read("Sam")));
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkServiceIntegrationTestRun method testSparkWithService.
@Test
public void testSparkWithService() throws Exception {
ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class);
startService(applicationManager);
SparkManager sparkManager = applicationManager.getSparkManager(TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS);
DataSetManager<KeyValueTable> datasetManager = getDataset("result");
KeyValueTable results = datasetManager.get();
for (int i = 1; i <= 5; i++) {
byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8);
Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key))));
}
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkTestRun method checkOutputData.
private void checkOutputData(DataSetManager<KeyValueTable> manager) {
KeyValueTable count = manager.get();
//read output and verify result
byte[] val = count.read(Bytes.toBytes(TEST_STRING_1));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length());
val = count.read(Bytes.toBytes(TEST_STRING_2));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length());
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkTestRun method testTransaction.
@Test
public void testTransaction() throws Exception {
ApplicationManager applicationManager = deploy(TestSparkApp.class);
StreamManager streamManager = getStreamManager("SparkStream");
// Write some sentences to the stream
streamManager.send("red fox");
streamManager.send("brown fox");
streamManager.send("grey fox");
streamManager.send("brown bear");
streamManager.send("black bear");
// Run the spark program
SparkManager sparkManager = applicationManager.getSparkManager(TransactionSpark.class.getSimpleName());
sparkManager.start(ImmutableMap.of("source.stream", "SparkStream", "keyvalue.table", "KeyValueTable", "result.all.dataset", "SparkResult", "result.threshold", "2", "result.threshold.dataset", "SparkThresholdResult"));
// Verify result from dataset before the Spark program terminates
final DataSetManager<KeyValueTable> resultManager = getDataset("SparkThresholdResult");
final KeyValueTable resultTable = resultManager.get();
// Expect the threshold result dataset, with threshold >=2, contains [brown, fox, bear]
Tasks.waitFor(ImmutableSet.of("brown", "fox", "bear"), new Callable<Set<String>>() {
@Override
public Set<String> call() throws Exception {
// This is to start a new TX
resultManager.flush();
LOG.info("Reading from threshold result");
try (CloseableIterator<KeyValue<byte[], byte[]>> itor = resultTable.scan(null, null)) {
return ImmutableSet.copyOf(Iterators.transform(itor, new Function<KeyValue<byte[], byte[]>, String>() {
@Override
public String apply(KeyValue<byte[], byte[]> input) {
String word = Bytes.toString(input.getKey());
LOG.info("{}, {}", word, Bytes.toInt(input.getValue()));
return word;
}
}));
}
}
}, 3, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
sparkManager.stop();
sparkManager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class SparkTestRun method testSparkWithObjectStore.
@Test
public void testSparkWithObjectStore() throws Exception {
ApplicationManager applicationManager = deploy(SparkAppUsingObjectStore.class);
DataSetManager<ObjectStore<String>> keysManager = getDataset("keys");
prepareInputData(keysManager);
SparkManager sparkManager = applicationManager.getSparkManager(CharCountProgram.class.getSimpleName()).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
DataSetManager<KeyValueTable> countManager = getDataset("count");
checkOutputData(countManager);
// validate that the table emitted metrics
// one read + one write in beforeSubmit(), increment (= read + write) in main -> 4
Tasks.waitFor(4L, new Callable<Long>() {
@Override
public Long call() throws Exception {
Collection<MetricTimeSeries> metrics = getMetricsManager().query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getNamespace(), Constants.Metrics.Tag.APP, SparkAppUsingObjectStore.class.getSimpleName(), Constants.Metrics.Tag.SPARK, CharCountProgram.class.getSimpleName(), Constants.Metrics.Tag.DATASET, "totals"), Collections.<String>emptyList()));
if (metrics.isEmpty()) {
return 0L;
}
Assert.assertEquals(1, metrics.size());
MetricTimeSeries ts = metrics.iterator().next();
Assert.assertEquals(1, ts.getTimeValues().size());
return ts.getTimeValues().get(0).getValue();
}
}, 10L, TimeUnit.SECONDS, 50L, TimeUnit.MILLISECONDS);
}
Aggregations