use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testWordCount.
@Test
public void testWordCount() throws Exception {
// deploy to namespace default by default
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
final String inputPath = createInput();
final java.io.File outputDir = new java.io.File(TEMP_FOLDER.newFolder(), "output");
try {
datasetCache.getDataset("someOtherNameSpace", "jobConfig");
Assert.fail("getDataset() should throw an exception when accessing a non-existing dataset.");
} catch (DatasetInstantiationException e) {
// expected
}
// Should work if explicitly specify the default namespace
final KeyValueTable jobConfigTable = datasetCache.getDataset(NamespaceId.DEFAULT.getNamespace(), "jobConfig");
// write config into dataset
Transactions.createTransactionExecutor(txExecutorFactory, jobConfigTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
jobConfigTable.write(Bytes.toBytes("inputPath"), Bytes.toBytes(inputPath));
jobConfigTable.write(Bytes.toBytes("outputPath"), Bytes.toBytes(outputDir.getPath()));
}
});
runProgram(app, AppWithMapReduce.ClassicWordCount.class, false, true);
Assert.assertEquals("true", System.getProperty("partitioner.initialize"));
Assert.assertEquals("true", System.getProperty("partitioner.destroy"));
Assert.assertEquals("true", System.getProperty("partitioner.set.conf"));
Assert.assertEquals("true", System.getProperty("comparator.initialize"));
Assert.assertEquals("true", System.getProperty("comparator.destroy"));
Assert.assertEquals("true", System.getProperty("comparator.set.conf"));
File[] outputFiles = outputDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.startsWith("part-r-") && !name.endsWith(".crc");
}
});
Assert.assertNotNull("no output files found", outputFiles);
int lines = 0;
for (File file : outputFiles) {
lines += Files.readLines(file, Charsets.UTF_8).size();
}
// dummy check that output file is not empty
Assert.assertTrue(lines > 0);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testSuccess.
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailureInOutputCommitter.
@Test
public void testFailureInOutputCommitter() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// We want to verify that when a mapreduce fails when committing the dataset outputs,
// the destroy method is still called and committed.
// (1) setup the datasets we use
datasetCache.newTransactionContext();
final KeyValueTable kvTable = datasetCache.getDataset("recorder");
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the table should not have initialized=true
kvTable.write("initialized", "false");
}
});
// 2) run job
runProgram(app, AppWithMapReduce.MapReduceWithFailingOutputCommitter.class, new HashMap<String, String>(), false);
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the destroy() method should have recorded FAILED status in the kv table
Assert.assertEquals(ProgramStatus.FAILED.name(), Bytes.toString(kvTable.read("status")));
}
});
datasetCache.dismissTransactionContext();
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class FakeDatasetDefinition method getDataset.
@Override
public FakeDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
return new FakeDataset(spec.getName(), table);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class StandaloneDatasetDefinition method getDataset.
@Override
public StandaloneDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
return new StandaloneDataset(spec.getName(), table);
}
Aggregations