Search in sources :

Example 1 with TimeseriesTable

use of co.cask.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class CoreDatasetsModule method register.

@Override
public void register(DatasetDefinitionRegistry registry) {
    DatasetDefinition<Table, DatasetAdmin> tableDef = registry.get("table");
    DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = new KeyValueTableDefinition("keyValueTable", tableDef);
    registry.add(kvTableDef);
    registry.add(new KeyValueTableDefinition(KeyValueTable.class.getName(), tableDef));
    DatasetDefinition<ObjectStore, DatasetAdmin> objectStoreDef = new ObjectStoreDefinition("objectStore", kvTableDef);
    registry.add(new ObjectStoreDefinition("objectStore", kvTableDef));
    registry.add(new ObjectStoreDefinition(ObjectStore.class.getName(), kvTableDef));
    registry.add(new IndexedObjectStoreDefinition("indexedObjectStore", tableDef, objectStoreDef));
    registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.class.getName(), tableDef, objectStoreDef));
    registry.add(new IndexedTableDefinition("indexedTable", tableDef));
    registry.add(new IndexedTableDefinition(IndexedTable.class.getName(), tableDef));
    registry.add(new TimeseriesTableDefinition("timeseriesTable", tableDef));
    registry.add(new TimeseriesTableDefinition(TimeseriesTable.class.getName(), tableDef));
    registry.add(new CounterTimeseriesTableDefinition("counterTimeseriesTable", tableDef));
    registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.class.getName(), tableDef));
    // in-memory table
    registry.add(new InMemoryTableDefinition("memoryTable"));
}
Also used : ObjectStore(co.cask.cdap.api.dataset.lib.ObjectStore) IndexedObjectStore(co.cask.cdap.api.dataset.lib.IndexedObjectStore) CounterTimeseriesTable(co.cask.cdap.api.dataset.lib.CounterTimeseriesTable) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) IndexedTable(co.cask.cdap.api.dataset.lib.IndexedTable) Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) InMemoryTableDefinition(co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryTableDefinition) CounterTimeseriesTableDefinition(co.cask.cdap.api.dataset.lib.CounterTimeseriesTableDefinition) IndexedTableDefinition(co.cask.cdap.api.dataset.lib.IndexedTableDefinition) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) IndexedObjectStoreDefinition(co.cask.cdap.api.dataset.lib.IndexedObjectStoreDefinition) KeyValueTableDefinition(co.cask.cdap.api.dataset.lib.KeyValueTableDefinition) IndexedObjectStoreDefinition(co.cask.cdap.api.dataset.lib.IndexedObjectStoreDefinition) TimeseriesTableDefinition(co.cask.cdap.api.dataset.lib.TimeseriesTableDefinition) CounterTimeseriesTableDefinition(co.cask.cdap.api.dataset.lib.CounterTimeseriesTableDefinition)

Example 2 with TimeseriesTable

use of co.cask.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testSuccess.

private void testSuccess(boolean frequentFlushing) throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, false);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Map<String, Long> expected = Maps.newHashMap();
            // note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
            expected.put("tag1", 18L);
            expected.put("tag2", 3L);
            expected.put("tag3", 18L);
            Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
            int count = 0;
            while (agg.hasNext()) {
                TimeseriesTable.Entry entry = agg.next();
                String tag = Bytes.toString(entry.getTags()[0]);
                Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
                count++;
            }
            Assert.assertEquals(expected.size(), count);
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
        }
    });
    datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
Also used : TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Get(co.cask.cdap.api.dataset.table.Get) Iterator(java.util.Iterator) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 3 with TimeseriesTable

use of co.cask.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testFailure.

// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
    // We want to verify that when mapreduce job fails:
    // * things written in initialize() remains and visible to others
    // * things written in tasks not visible to others TODO AAA: do invalidate
    // * things written in onfinish() remains and visible to others
    // NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
    //       here to be recognized by map tasks as a message to emulate failure
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, true);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, false);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // data should be rolled back todo: test that partially written is rolled back too
            Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
            // but written beforeSubmit and onFinish is available to others
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
        }
    });
    datasetCache.dismissTransactionContext();
}
Also used : TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Get(co.cask.cdap.api.dataset.table.Get) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable)

Example 4 with TimeseriesTable

use of co.cask.cdap.api.dataset.lib.TimeseriesTable in project cdap by caskdata.

the class SparkStreamingTestRun method test.

@Test
public void test() throws Exception {
    File checkpointDir = TEMP_FOLDER.newFolder();
    KafkaPublisher publisher = KAFKA_TESTER.getKafkaClient().getPublisher(KafkaPublisher.Ack.LEADER_RECEIVED, Compression.NONE);
    ApplicationManager appManager = deployApplication(TestSparkApp.class);
    Map<String, String> args = ImmutableMap.of("checkpoint.path", checkpointDir.getAbsolutePath(), "kafka.brokers", KAFKA_TESTER.getBrokerService().getBrokerList(), "kafka.topics", "testtopic", "result.dataset", "TimeSeriesResult");
    SparkManager manager = appManager.getSparkManager(KafkaSparkStreaming.class.getSimpleName());
    manager.start(args);
    // Send 100 messages over 5 seconds
    for (int i = 0; i < 100; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // Sum up everything from the TimeSeriesTable. The "Message" should have count 100, while each number (0-99) should
    // have count of 1
    final DataSetManager<TimeseriesTable> tsTableManager = getDataset("TimeSeriesResult");
    final TimeseriesTable tsTable = tsTableManager.get();
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (int i = 0; i < 100; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
    // Send 100 more messages without pause
    for (int i = 100; i < 200; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
    }
    // Start the streaming app again. It should resume from where it left off because of checkpoint
    manager.start(args);
    // Expects "Message" having count = 200.
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    // Expects each number (0-199) have count of 1
    for (int i = 0; i < 200; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRuns(ProgramRunStatus.KILLED, 2, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) KafkaSparkStreaming(co.cask.cdap.spark.app.KafkaSparkStreaming) File(java.io.File) KafkaPublisher(org.apache.twill.kafka.client.KafkaPublisher) Test(org.junit.Test)

Aggregations

TimeseriesTable (co.cask.cdap.api.dataset.lib.TimeseriesTable)4 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)3 Table (co.cask.cdap.api.dataset.table.Table)3 Get (co.cask.cdap.api.dataset.table.Get)2 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)2 TransactionExecutor (org.apache.tephra.TransactionExecutor)2 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)1 CounterTimeseriesTable (co.cask.cdap.api.dataset.lib.CounterTimeseriesTable)1 CounterTimeseriesTableDefinition (co.cask.cdap.api.dataset.lib.CounterTimeseriesTableDefinition)1 IndexedObjectStore (co.cask.cdap.api.dataset.lib.IndexedObjectStore)1 IndexedObjectStoreDefinition (co.cask.cdap.api.dataset.lib.IndexedObjectStoreDefinition)1 IndexedTable (co.cask.cdap.api.dataset.lib.IndexedTable)1 IndexedTableDefinition (co.cask.cdap.api.dataset.lib.IndexedTableDefinition)1 KeyValueTableDefinition (co.cask.cdap.api.dataset.lib.KeyValueTableDefinition)1 ObjectStore (co.cask.cdap.api.dataset.lib.ObjectStore)1 TimeseriesTableDefinition (co.cask.cdap.api.dataset.lib.TimeseriesTableDefinition)1 InMemoryTableDefinition (co.cask.cdap.data2.dataset2.lib.table.inmemory.InMemoryTableDefinition)1 KafkaSparkStreaming (co.cask.cdap.spark.app.KafkaSparkStreaming)1 ApplicationManager (co.cask.cdap.test.ApplicationManager)1 SparkManager (co.cask.cdap.test.SparkManager)1