use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testMapreduceWithFile.
private void testMapreduceWithFile(String inputDatasetName, String inputPaths, String outputDatasetName, String outputPath, Class appClass, Class mrClass, Map<String, String> extraRuntimeArgs, @Nullable final String counterTableName, @Nullable final String outputSeparator) throws Exception {
final ApplicationWithPrograms app = deployApp(appClass, new AppWithMapReduceUsingFileSet.AppConfig(inputDatasetName, outputDatasetName));
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
FileSetArguments.setInputPaths(inputArgs, inputPaths);
FileSetArguments.setOutputPath(outputArgs, outputPath);
if (outputSeparator != null) {
outputArgs.put(FileSetProperties.OUTPUT_PROPERTIES_PREFIX + TextOutputFormat.SEPERATOR, "#");
}
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, inputDatasetName, inputArgs));
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, outputDatasetName, outputArgs));
if (extraRuntimeArgs != null) {
runtimeArguments.putAll(extraRuntimeArgs);
}
// clear the counters in case a previous test case left behind some values
if (counterTableName != null) {
Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", () -> {
KeyValueTable counters = datasetCache.getDataset(counterTableName);
counters.delete(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS);
counters.delete(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS);
});
}
// write a handful of numbers to a file; compute their sum, too.
final long[] values = { 15L, 17L, 7L, 3L };
final FileSet input = datasetCache.getDataset(inputDatasetName, inputArgs);
long sum = 0L, count = 1;
long inputRecords = 0;
for (Location inputLocation : input.getInputLocations()) {
final PrintWriter writer = new PrintWriter(inputLocation.getOutputStream());
for (long value : values) {
value *= count;
writer.println(value);
sum += value;
inputRecords++;
}
writer.close();
count++;
}
runProgram(app, mrClass, new BasicArguments(runtimeArguments));
// output location in file system is a directory that contains a part file, a _SUCCESS file, and checksums
// (.<filename>.crc) for these files. Find the actual part file. Its name begins with "part". In this case,
// there should be only one part file (with this small data, we have a single reducer).
final FileSet results = datasetCache.getDataset(outputDatasetName, outputArgs);
Location resultLocation = results.getOutputLocation();
if (resultLocation.isDirectory()) {
for (Location child : resultLocation.list()) {
if (!child.isDirectory() && child.getName().startsWith("part")) {
resultLocation = child;
break;
}
}
}
Assert.assertFalse(resultLocation.isDirectory());
// read output and verify result
String line = CharStreams.readFirstLine(CharStreams.newReaderSupplier(Locations.newInputSupplier(resultLocation), Charsets.UTF_8));
Assert.assertNotNull(line);
String[] fields = line.split(outputSeparator == null ? ":" : outputSeparator);
Assert.assertEquals(2, fields.length);
Assert.assertEquals(AppWithMapReduceUsingFileSet.FileMapper.ONLY_KEY, fields[0]);
Assert.assertEquals(sum, Long.parseLong(fields[1]));
if (counterTableName != null) {
final long totalInputRecords = inputRecords;
Transactions.execute(datasetCache.newTransactionContext(), "countersVerify", () -> {
KeyValueTable counters = datasetCache.getDataset(counterTableName);
Assert.assertEquals(totalInputRecords, counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.INPUT_RECORDS, 0L));
Assert.assertEquals(1L, counters.incrementAndGet(AppWithMapReduceUsingRuntimeDatasets.REDUCE_KEYS, 0L));
});
}
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailureInInit.
@Test
public void testFailureInInit() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
testFailureInInit("true", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.<String, String>of());
testFailureInInit("false", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.of("failInput", "true"));
testFailureInInit("false", app, AppWithMapReduce.FaiiingMR.class, ImmutableMap.of("failOutput", "true"));
testFailureInInit("true", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.<String, String>of());
testFailureInInit("false", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.of("failInput", "true"));
testFailureInInit("false", app, AppWithMapReduce.ExplicitFaiiingMR.class, ImmutableMap.of("failOutput", "true"));
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceWithMultipleOutputsTest method testMultipleOutputs.
@Test
public void testMultipleOutputs() throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class);
final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.PURCHASES);
Location inputFile = fileSet.getBaseLocation().append("inputFile");
inputFile.createNew();
PrintWriter writer = new PrintWriter(inputFile.getOutputStream());
// the PURCHASES dataset consists of purchase records in the format: <customerId> <spend>
writer.println("1 20");
writer.println("1 65");
writer.println("1 30");
writer.println("2 5");
writer.println("2 53");
writer.println("2 45");
writer.println("3 101");
writer.close();
// Using multiple outputs, this MapReduce send the records to a different path of the same dataset, depending
// on the value in the data (large spend amounts will go to one file, while small will go to another file.
runProgram(app, AppWithMapReduceUsingMultipleOutputs.SeparatePurchases.class, new BasicArguments());
FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.SEPARATED_PURCHASES);
Assert.assertEquals(ImmutableList.of("1 20", "1 30", "2 5", "2 45"), readFromOutput(outputFileSet, "small_purchases"));
Assert.assertEquals(ImmutableList.of("1 65", "2 53", "3 101"), readFromOutput(outputFileSet, "large_purchases"));
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceWithMultipleOutputsTest method testAddingMultipleOutputsWithSameAlias.
@Test
public void testAddingMultipleOutputsWithSameAlias() throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class);
// will fail because it configured two outputs with the same alias
Assert.assertFalse(runProgram(app, AppWithMapReduceUsingMultipleOutputs.InvalidMapReduce.class, new BasicArguments()));
}
use of io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms in project cdap by caskdata.
the class MapReduceWithPartitionedTest method testPartitionedFileSetWithMR.
private void testPartitionedFileSetWithMR(boolean useCombineFileInputFormat) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithPartitionedFileSet.class, new AppWithPartitionedFileSet.AppConfig(useCombineFileInputFormat));
// write a value to the input table
final Table table = datasetCache.getDataset(AppWithPartitionedFileSet.INPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.put(Bytes.toBytes("x"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("1"));
}
});
// a partition key for the map/reduce output
final PartitionKey keyX = PartitionKey.builder().addStringField("type", "x").addLongField("time", 150000L).build();
// run the partition writer m/r with this output partition time
Map<String, String> runtimeArguments = Maps.newHashMap();
Map<String, String> outputArgs = Maps.newHashMap();
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
final PartitionedFileSet dataset = datasetCache.getDataset(PARTITIONED);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = dataset.getPartition(keyX);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertTrue(path.contains("x"));
Assert.assertTrue(path.contains("150000"));
}
});
// delete the data in the input table and write a new row
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) table).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
table.delete(Bytes.toBytes("x"));
table.put(Bytes.toBytes("y"), AppWithPartitionedFileSet.ONLY_COLUMN, Bytes.toBytes("2"));
}
});
// a new partition key for the next map/reduce
final PartitionKey keyY = PartitionKey.builder().addStringField("type", "y").addLongField("time", 200000L).build();
// now run the m/r again with a new partition time, say 5 minutes later
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionWriter.class, new BasicArguments(runtimeArguments)));
// this should have created a partition in the tpfs
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Partition partition = dataset.getPartition(keyY);
Assert.assertNotNull(partition);
String path = partition.getRelativePath();
Assert.assertNotNull(path);
Assert.assertTrue(path.contains("y"));
Assert.assertTrue(path.contains("200000"));
}
});
// a partition filter that matches the outputs of both map/reduces
PartitionFilter filterXY = PartitionFilter.builder().addRangeCondition("type", "x", "z").build();
// now run a map/reduce that reads all the partitions
runtimeArguments = Maps.newHashMap();
Map<String, String> inputArgs = Maps.newHashMap();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read both partitions - and written both x and y to row a
final Table output = datasetCache.getDataset(AppWithPartitionedFileSet.OUTPUT);
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("a"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
Assert.assertEquals("2", row.getString("y"));
Assert.assertEquals("{type=y, time=200000}", row.getString("y_key"));
}
});
// a partition filter that matches the output key of the first map/reduce
PartitionFilter filterX = PartitionFilter.builder().addValueCondition("type", "x").addRangeCondition("time", null, 160000L).build();
// now run a map/reduce that reads a range of the partitions, namely the first one
inputArgs.clear();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read the first partition only - and written only x to row b
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("b"));
Assert.assertEquals("1", row.getString("x"));
Assert.assertEquals("{type=x, time=150000}", row.getString("x_key"));
Assert.assertNull(row.get("y"));
Assert.assertNull(row.get("y_key"));
}
});
// a partition filter that matches no key
PartitionFilter filterMT = PartitionFilter.builder().addValueCondition("type", "nosuchthing").build();
// now run a map/reduce that reads an empty range of partitions (the filter matches nothing)
inputArgs.clear();
PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs));
runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");
Assert.assertTrue(runProgram(app, AppWithPartitionedFileSet.PartitionReader.class, new BasicArguments(runtimeArguments)));
// this should have read no partitions - and written nothing to row n
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Row row = output.get(Bytes.toBytes("n"));
Assert.assertTrue(row.isEmpty());
}
});
}
Aggregations