Search in sources :

Example 1 with TimeseriesTable

use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class SparkStreamingTestRun method test.

@Test
public void test() throws Exception {
    File checkpointDir = TEMP_FOLDER.newFolder();
    KafkaPublisher publisher = KAFKA_TESTER.getKafkaClient().getPublisher(KafkaPublisher.Ack.LEADER_RECEIVED, Compression.NONE);
    ApplicationManager appManager = deployApplication(TestSparkApp.class);
    Map<String, String> args = ImmutableMap.of("checkpoint.path", checkpointDir.getAbsolutePath(), "kafka.brokers", KAFKA_TESTER.getBrokerService().getBrokerList(), "kafka.topics", "testtopic", "result.dataset", "TimeSeriesResult");
    SparkManager manager = appManager.getSparkManager(KafkaSparkStreaming.class.getSimpleName());
    manager.start(args);
    // Send 100 messages over 5 seconds
    for (int i = 0; i < 100; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // Sum up everything from the TimeSeriesTable. The "Message" should have count 100, while each number (0-99) should
    // have count of 1
    final DataSetManager<TimeseriesTable> tsTableManager = getDataset("TimeSeriesResult");
    final TimeseriesTable tsTable = tsTableManager.get();
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (int i = 0; i < 100; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
    // Send 100 more messages without pause
    for (int i = 100; i < 200; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
    }
    // Start the streaming app again. It should resume from where it left off because of checkpoint
    manager.start(args);
    // Expects "Message" having count = 200.
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    // Expects each number (0-199) have count of 1
    for (int i = 0; i < 200; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRuns(ProgramRunStatus.KILLED, 2, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) KafkaSparkStreaming(io.cdap.cdap.spark.app.KafkaSparkStreaming) File(java.io.File) KafkaPublisher(org.apache.twill.kafka.client.KafkaPublisher) Test(org.junit.Test)

Example 2 with TimeseriesTable

use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testSuccess.

private void testSuccess(boolean frequentFlushing) throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, false);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Map<String, Long> expected = Maps.newHashMap();
            // note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
            expected.put("tag1", 18L);
            expected.put("tag2", 3L);
            expected.put("tag3", 18L);
            Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
            int count = 0;
            while (agg.hasNext()) {
                TimeseriesTable.Entry entry = agg.next();
                String tag = Bytes.toString(entry.getTags()[0]);
                Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
                count++;
            }
            Assert.assertEquals(expected.size(), count);
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
        }
    });
    datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Get(io.cdap.cdap.api.dataset.table.Get) Iterator(java.util.Iterator) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 3 with TimeseriesTable

use of io.cdap.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testFailure.

// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
    // We want to verify that when mapreduce job fails:
    // * things written in initialize() remains and visible to others
    // * things written in tasks not visible to others TODO AAA: do invalidate
    // * things written in onfinish() remains and visible to others
    // NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
    // here to be recognized by map tasks as a message to emulate failure
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, true);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, false);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // data should be rolled back todo: test that partially written is rolled back too
            Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
            // but written beforeSubmit and onFinish is available to others
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
        }
    });
    datasetCache.dismissTransactionContext();
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Get(io.cdap.cdap.api.dataset.table.Get) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable)

Aggregations

TimeseriesTable (io.cdap.cdap.api.dataset.lib.TimeseriesTable)3 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)2 Get (io.cdap.cdap.api.dataset.table.Get)2 Table (io.cdap.cdap.api.dataset.table.Table)2 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)2 TransactionExecutor (org.apache.tephra.TransactionExecutor)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 KafkaSparkStreaming (io.cdap.cdap.spark.app.KafkaSparkStreaming)1 ApplicationManager (io.cdap.cdap.test.ApplicationManager)1 SparkManager (io.cdap.cdap.test.SparkManager)1 File (java.io.File)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Map (java.util.Map)1 KafkaPublisher (org.apache.twill.kafka.client.KafkaPublisher)1 Test (org.junit.Test)1