Search in sources :

Example 36 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testWordCount.

@Test
public void testWordCount() throws Exception {
    // deploy to namespace default by default
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    final String inputPath = createInput();
    final java.io.File outputDir = new java.io.File(TEMP_FOLDER.newFolder(), "output");
    try {
        datasetCache.getDataset("someOtherNameSpace", "jobConfig");
        Assert.fail("getDataset() should throw an exception when accessing a non-existing dataset.");
    } catch (DatasetInstantiationException e) {
    // expected
    }
    // Should work if explicitly specify the default namespace
    final KeyValueTable jobConfigTable = datasetCache.getDataset(NamespaceId.DEFAULT.getNamespace(), "jobConfig");
    // write config into dataset
    Transactions.createTransactionExecutor(txExecutorFactory, jobConfigTable).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            jobConfigTable.write(Bytes.toBytes("inputPath"), Bytes.toBytes(inputPath));
            jobConfigTable.write(Bytes.toBytes("outputPath"), Bytes.toBytes(outputDir.getPath()));
        }
    });
    runProgram(app, AppWithMapReduce.ClassicWordCount.class, false, true);
    Assert.assertEquals("true", System.getProperty("partitioner.initialize"));
    Assert.assertEquals("true", System.getProperty("partitioner.destroy"));
    Assert.assertEquals("true", System.getProperty("partitioner.set.conf"));
    Assert.assertEquals("true", System.getProperty("comparator.initialize"));
    Assert.assertEquals("true", System.getProperty("comparator.destroy"));
    Assert.assertEquals("true", System.getProperty("comparator.set.conf"));
    File[] outputFiles = outputDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.startsWith("part-r-") && !name.endsWith(".crc");
        }
    });
    Assert.assertNotNull("no output files found", outputFiles);
    int lines = 0;
    for (File file : outputFiles) {
        lines += Files.readLines(file, Charsets.UTF_8).size();
    }
    // dummy check that output file is not empty
    Assert.assertTrue(lines > 0);
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) File(java.io.File) FilenameFilter(java.io.FilenameFilter) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) File(java.io.File) DatasetInstantiationException(co.cask.cdap.api.data.DatasetInstantiationException) Test(org.junit.Test)

Example 37 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testSuccess.

private void testSuccess(boolean frequentFlushing) throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, false);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Map<String, Long> expected = Maps.newHashMap();
            // note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
            expected.put("tag1", 18L);
            expected.put("tag2", 3L);
            expected.put("tag3", 18L);
            Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
            int count = 0;
            while (agg.hasNext()) {
                TimeseriesTable.Entry entry = agg.next();
                String tag = Bytes.toString(entry.getTags()[0]);
                Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
                count++;
            }
            Assert.assertEquals(expected.size(), count);
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
        }
    });
    datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
Also used : TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Get(co.cask.cdap.api.dataset.table.Get) Iterator(java.util.Iterator) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 38 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testFailureInOutputCommitter.

@Test
public void testFailureInOutputCommitter() throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // We want to verify that when a mapreduce fails when committing the dataset outputs,
    // the destroy method is still called and committed.
    // (1) setup the datasets we use
    datasetCache.newTransactionContext();
    final KeyValueTable kvTable = datasetCache.getDataset("recorder");
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the table should not have initialized=true
            kvTable.write("initialized", "false");
        }
    });
    // 2) run job
    runProgram(app, AppWithMapReduce.MapReduceWithFailingOutputCommitter.class, new HashMap<String, String>(), false);
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // the destroy() method should have recorded FAILED status in the kv table
            Assert.assertEquals(ProgramStatus.FAILED.name(), Bytes.toString(kvTable.read("status")));
        }
    });
    datasetCache.dismissTransactionContext();
}
Also used : ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) TransactionExecutor(org.apache.tephra.TransactionExecutor) Test(org.junit.Test)

Example 39 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class FakeDatasetDefinition method getDataset.

@Override
public FakeDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    DatasetSpecification kvTableSpec = spec.getSpecification("objects");
    KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
    return new FakeDataset(spec.getName(), table);
}
Also used : KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification)

Example 40 with KeyValueTable

use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class StandaloneDatasetDefinition method getDataset.

@Override
public StandaloneDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    DatasetSpecification kvTableSpec = spec.getSpecification("objects");
    KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
    return new StandaloneDataset(spec.getName(), table);
}
Also used : KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) DatasetSpecification(co.cask.cdap.api.dataset.DatasetSpecification)

Aggregations

KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)84 Test (org.junit.Test)49 ApplicationManager (co.cask.cdap.test.ApplicationManager)45 SparkManager (co.cask.cdap.test.SparkManager)25 StreamManager (co.cask.cdap.test.StreamManager)16 IOException (java.io.IOException)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)12 ApplicationWithPrograms (co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)11 HashMap (java.util.HashMap)11 ArrayList (java.util.ArrayList)10 FileSet (co.cask.cdap.api.dataset.lib.FileSet)8 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)8 Table (co.cask.cdap.api.dataset.table.Table)8 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)8 ObjectStore (co.cask.cdap.api.dataset.lib.ObjectStore)7 MapReduceManager (co.cask.cdap.test.MapReduceManager)7 ServiceManager (co.cask.cdap.test.ServiceManager)7 WorkflowManager (co.cask.cdap.test.WorkflowManager)7 Set (java.util.Set)7 Category (org.junit.experimental.categories.Category)7