use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailureInOutputCommitter.
@Test
public void testFailureInOutputCommitter() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// We want to verify that when a mapreduce fails when committing the dataset outputs,
// the destroy method is still called and committed.
// (1) setup the datasets we use
datasetCache.newTransactionContext();
final KeyValueTable kvTable = datasetCache.getDataset("recorder");
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the table should not have initialized=true
kvTable.write("initialized", "false");
}
});
// 2) run job
runProgram(app, AppWithMapReduce.MapReduceWithFailingOutputCommitter.class, new HashMap<String, String>(), false);
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the destroy() method should have recorded FAILED status in the kv table
Assert.assertEquals(ProgramStatus.FAILED.name(), Bytes.toString(kvTable.read("status")));
}
});
datasetCache.dismissTransactionContext();
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testWordCount.
@Test
public void testWordCount() throws Exception {
// deploy to namespace default by default
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
final String inputPath = createInput();
final java.io.File outputDir = new java.io.File(TEMP_FOLDER.newFolder(), "output");
try {
datasetCache.getDataset("someOtherNameSpace", "jobConfig");
Assert.fail("getDataset() should throw an exception when accessing a non-existing dataset.");
} catch (DatasetInstantiationException e) {
// expected
}
// Should work if explicitly specify the default namespace
final KeyValueTable jobConfigTable = datasetCache.getDataset(NamespaceId.DEFAULT.getNamespace(), "jobConfig");
// write config into dataset
Transactions.createTransactionExecutor(txExecutorFactory, jobConfigTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
jobConfigTable.write(Bytes.toBytes("inputPath"), Bytes.toBytes(inputPath));
jobConfigTable.write(Bytes.toBytes("outputPath"), Bytes.toBytes(outputDir.getPath()));
}
});
runProgram(app, AppWithMapReduce.ClassicWordCount.class, false, true);
Assert.assertEquals("true", System.getProperty("partitioner.initialize"));
Assert.assertEquals("true", System.getProperty("partitioner.destroy"));
Assert.assertEquals("true", System.getProperty("partitioner.set.conf"));
Assert.assertEquals("true", System.getProperty("comparator.initialize"));
Assert.assertEquals("true", System.getProperty("comparator.destroy"));
Assert.assertEquals("true", System.getProperty("comparator.set.conf"));
File[] outputFiles = outputDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.startsWith("part-r-") && !name.endsWith(".crc");
}
});
Assert.assertNotNull("no output files found", outputFiles);
int lines = 0;
for (File file : outputFiles) {
lines += Files.readLines(file, Charsets.UTF_8).size();
}
// dummy check that output file is not empty
Assert.assertTrue(lines > 0);
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testMapreduceWithObjectStore.
@Test
public void testMapreduceWithObjectStore() throws Exception {
// Deploy apps to another namespace and test cross-namespace access meanwhile
final ApplicationWithPrograms app = deployApp(Id.Namespace.fromEntityId(new NamespaceId("someOtherNameSpace")), AppWithMapReduceUsingObjectStore.class);
final ObjectStore<String> input = datasetCache.getDataset("someOtherNameSpace", "keys");
// Get dataset from a non existing namespace
try {
datasetCache.getDataset("nonExistingNameSpace", "keys");
Assert.fail("getDataset() should throw an exception when accessing dataset from a non-existing namespace.");
} catch (DatasetInstantiationException e) {
// expected
}
final String testString = "persisted data";
// Populate some input
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) input).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
input.write(Bytes.toBytes(testString), testString);
input.write(Bytes.toBytes("distributed systems"), "distributed systems");
}
});
runProgram(app, AppWithMapReduceUsingObjectStore.ComputeCounts.class, false, true);
final KeyValueTable output = datasetCache.getDataset("someOtherNameSpace", "count");
// read output and verify result
Transactions.createTransactionExecutor(txExecutorFactory, output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
byte[] val = output.read(Bytes.toBytes(testString));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), Integer.toString(testString.length()));
val = output.read(Bytes.toBytes("distributed systems"));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), "19");
}
});
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testMapReduceWithLocalFiles.
@Test
public void testMapReduceWithLocalFiles() throws Exception {
ApplicationWithPrograms appWithPrograms = deployApp(AppWithLocalFiles.class);
URI stopWordsFile = createStopWordsFile();
final KeyValueTable kvTable = datasetCache.getDataset(AppWithLocalFiles.MR_INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
kvTable.write("2324", "a test record");
kvTable.write("43353", "the test table");
kvTable.write("34335", "an end record");
}
});
runProgram(appWithPrograms, AppWithLocalFiles.MapReduceWithLocalFiles.class, new BasicArguments(ImmutableMap.of(AppWithLocalFiles.MR_INPUT_DATASET, "input", AppWithLocalFiles.MR_OUTPUT_DATASET, "output", AppWithLocalFiles.STOPWORDS_FILE_ARG, stopWordsFile.toString())));
final KeyValueTable outputKvTable = datasetCache.getDataset(AppWithLocalFiles.MR_OUTPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, outputKvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Assert.assertNull(outputKvTable.read("a"));
Assert.assertNull(outputKvTable.read("the"));
Assert.assertNull(outputKvTable.read("an"));
Assert.assertEquals(2, Bytes.toInt(outputKvTable.read("test")));
Assert.assertEquals(2, Bytes.toInt(outputKvTable.read("record")));
Assert.assertEquals(1, Bytes.toInt(outputKvTable.read("table")));
Assert.assertEquals(1, Bytes.toInt(outputKvTable.read("end")));
}
});
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testTransactionHandling.
/**
* Tests that initialize() and getSplits() are called in the same transaction,
* and with the same instance of the input dataset.
*/
@Test
public void testTransactionHandling() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithTxAware.class);
runProgram(app, AppWithTxAware.PedanticMapReduce.class, new BasicArguments(ImmutableMap.of("outputPath", TEMP_FOLDER_SUPPLIER.get().getPath() + "/output")));
}
Aggregations