use of co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailureInInit.
@Test
public void testFailureInInit() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
testFailureInInit("true", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.<String, String>of());
testFailureInInit("false", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.of("failInput", "true"));
testFailureInInit("false", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.of("failOutput", "true"));
testFailureInInit("true", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.<String, String>of());
testFailureInInit("false", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.of("failInput", "true"));
testFailureInInit("false", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.of("failOutput", "true"));
}
use of co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testMapreduceWithObjectStore.
@Test
public void testMapreduceWithObjectStore() throws Exception {
// Deploy apps to another namespace and test cross-namespace access meanwhile
final ApplicationWithPrograms app = deployApp(new NamespaceId("someOtherNameSpace").toId(), AppWithMapReduceUsingObjectStore.class);
final ObjectStore<String> input = datasetCache.getDataset("someOtherNameSpace", "keys");
// Get dataset from a non existing namespace
try {
datasetCache.getDataset("nonExistingNameSpace", "keys");
Assert.fail("getDataset() should throw an exception when accessing dataset from a non-existing namespace.");
} catch (DatasetInstantiationException e) {
// expected
}
final String testString = "persisted data";
//Populate some input
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) input).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
input.write(Bytes.toBytes(testString), testString);
input.write(Bytes.toBytes("distributed systems"), "distributed systems");
}
});
runProgram(app, AppWithMapReduceUsingObjectStore.ComputeCounts.class, false, true);
final KeyValueTable output = datasetCache.getDataset("someOtherNameSpace", "count");
//read output and verify result
Transactions.createTransactionExecutor(txExecutorFactory, output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
byte[] val = output.read(Bytes.toBytes(testString));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), Integer.toString(testString.length()));
val = output.read(Bytes.toBytes("distributed systems"));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), "19");
}
});
}
use of co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceWithMultipleInputsTest method testSimpleJoin.
@Test
public void testSimpleJoin() throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleInputs.class);
final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.PURCHASES);
Location inputFile = fileSet.getBaseLocation().append("inputFile");
inputFile.createNew();
PrintWriter writer = new PrintWriter(inputFile.getOutputStream());
// the PURCHASES dataset consists of purchase records in the format: <customerId> <spend>
writer.println("1 20");
writer.println("1 25");
writer.println("1 30");
writer.println("2 5");
writer.close();
// write some of the purchases to the stream
writeToStream(AppWithMapReduceUsingMultipleInputs.PURCHASES, "2 13");
writeToStream(AppWithMapReduceUsingMultipleInputs.PURCHASES, "3 60");
FileSet fileSet2 = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.CUSTOMERS);
inputFile = fileSet2.getBaseLocation().append("inputFile");
inputFile.createNew();
// the CUSTOMERS dataset consists of records in the format: <customerId> <customerName>
writer = new PrintWriter(inputFile.getOutputStream());
writer.println("1 Bob");
writer.println("2 Samuel");
writer.println("3 Joe");
writer.close();
// Using multiple inputs, this MapReduce will join on the two above datasets to get aggregate results.
// The records are expected to be in the form: <customerId> <customerName> <totalSpend>
runProgram(app, AppWithMapReduceUsingMultipleInputs.ComputeSum.class, new BasicArguments());
FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.OUTPUT_DATASET);
// will only be 1 part file, due to the small amount of data
Location outputLocation = outputFileSet.getBaseLocation().append("output").append("part-r-00000");
List<String> lines = CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(outputLocation), Charsets.UTF_8));
Assert.assertEquals(ImmutableList.of("1 Bob 75", "2 Samuel 18", "3 Joe 60"), lines);
// assert that the mapper was initialized and destroyed (this doesn't happen when using hadoop's MultipleOutputs).
Assert.assertEquals("true", System.getProperty("mapper.initialized"));
Assert.assertEquals("true", System.getProperty("mapper.destroyed"));
}
use of co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceWithPartitionedTest method testTimePartitionedWithMR.
@Test
public void testTimePartitionedWithMR() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithTimePartitionedFileSet.class);
// write a value to the input table
final Table table = datasetCache.getDataset(AppWithTimePartitionedFileSet.INPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.put(Bytes.toBytes("x"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
}
});
final long time = DATE_FORMAT.parse("1/15/15 11:15 am").getTime();
final long time5 = time + TimeUnit.MINUTES.toMillis(5);
// run the partition writer m/r with this output partition time
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time);
final ImmutableMap<String, String> assignedMetadata = ImmutableMap.of("region", "13", "data.source.name", "input", "data.source.type", "table");
TimePartitionedFileSetArguments.setOutputPartitionMetadata(outputArgs, assignedMetadata);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
final TimePartitionedFileSet tpfs = datasetCache.getDataset(TIME_PARTITIONED);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
TimePartitionDetail partition = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("2015-01-15/11-15"));
Assert.assertEquals(assignedMetadata, partition.getMetadata().asMap());
}
});
// delete the data in the input table and write a new row
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.delete(Bytes.toBytes("x"));
table.put(Bytes.toBytes("y"), AppWithTimePartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
}
});
// now run the m/r again with a new partition time, say 5 minutes later
TimePartitionedFileSetArguments.setOutputPartitionTime(outputArgs, time5);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, outputArgs));
// make the mapreduce add the partition in destroy, to validate that this does not fail the job
runtimeArguments.put(AppWithTimePartitionedFileSet.COMPAT_ADD_PARTITION, "true");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) tpfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = tpfs.getPartitionByTime(time5);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("2015-01-15/11-20"));
}
});
// now run a map/reduce that reads all the partitions
runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time5 + TimeUnit.MINUTES.toMillis(5));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "a");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read both partitions - and written both x and y to row a
final Table output = datasetCache.getDataset(AppWithTimePartitionedFileSet.OUTPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("a"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("2", row.getString("y"));
}
});
// now run a map/reduce that reads a range of the partitions, namely the first one
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(5));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time + TimeUnit.MINUTES.toMillis(2));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "b");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read the first partition only - and written only x to row b
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("b"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertNull(row.get("y"));
}
});
// now run a map/reduce that reads no partitions (because the range matches nothing)
TimePartitionedFileSetArguments.setInputStartTime(inputArgs, time - TimeUnit.MINUTES.toMillis(10));
TimePartitionedFileSetArguments.setInputEndTime(inputArgs, time - TimeUnit.MINUTES.toMillis(9));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, TIME_PARTITIONED, inputArgs));
runtimeArguments.put(AppWithTimePartitionedFileSet.ROW_TO_WRITE, "n");
Assert.assertTrue(runProgram(app, AppWithTimePartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read no partitions - and written nothing to row n
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("n"));
Assert.assertTrue(row.isEmpty());
}
});
}
use of co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testSuccess.
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
Aggregations