use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.
the class SparkStreamingTestRun method test.
@Test
public void test() throws Exception {
File checkpointDir = TEMP_FOLDER.newFolder();
KafkaPublisher publisher = KAFKA_TESTER.getKafkaClient().getPublisher(KafkaPublisher.Ack.LEADER_RECEIVED, Compression.NONE);
ApplicationManager appManager = deployApplication(TestSparkApp.class);
Map<String, String> args = ImmutableMap.of("checkpoint.path", checkpointDir.getAbsolutePath(), "kafka.brokers", KAFKA_TESTER.getBrokerService().getBrokerList(), "kafka.topics", "testtopic", "result.dataset", "TimeSeriesResult");
SparkManager manager = appManager.getSparkManager(KafkaSparkStreaming.class.getSimpleName());
manager.start(args);
// Send 100 messages over 5 seconds
for (int i = 0; i < 100; i++) {
publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
TimeUnit.MILLISECONDS.sleep(50);
}
// Sum up everything from the TimeSeriesTable. The "Message" should have count 100, while each number (0-99) should
// have count of 1
final DataSetManager<TimeseriesTable> tsTableManager = getDataset("TimeSeriesResult");
final TimeseriesTable tsTable = tsTableManager.get();
Tasks.waitFor(100L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts("Message", tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
for (int i = 0; i < 100; i++) {
final int finalI = i;
Tasks.waitFor(1L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts(Integer.toString(finalI), tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
}
manager.stop();
manager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
// Send 100 more messages without pause
for (int i = 100; i < 200; i++) {
publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
}
// Start the streaming app again. It should resume from where it left off because of checkpoint
manager.start(args);
// Expects "Message" having count = 200.
Tasks.waitFor(100L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts("Message", tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
// Expects each number (0-199) have count of 1
for (int i = 0; i < 200; i++) {
final int finalI = i;
Tasks.waitFor(1L, new Callable<Long>() {
@Override
public Long call() throws Exception {
tsTableManager.flush();
return getCounts(Integer.toString(finalI), tsTable);
}
}, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
}
manager.stop();
manager.waitForRuns(ProgramRunStatus.KILLED, 2, 60, TimeUnit.SECONDS);
}
use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testSuccess.
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailure.
// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
// We want to verify that when mapreduce job fails:
// * things written in initialize() remains and visible to others
// * things written in tasks not visible to others TODO AAA: do invalidate
// * things written in onfinish() remains and visible to others
// NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
// here to be recognized by map tasks as a message to emulate failure
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, true);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, false);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// data should be rolled back todo: test that partially written is rolled back too
Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
// but written beforeSubmit and onFinish is available to others
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
}
});
datasetCache.dismissTransactionContext();
}
Aggregations