use of java.io.DataOutputStream in project hadoop by apache.
the class TestChainMapReduce method testChain.
@Test
public void testChain() throws Exception {
Path inDir = new Path("testing/chain/input");
Path outDir = new Path("testing/chain/output");
// Hack for local FS that does not have the concept of a 'mounting point'
if (isLocalFS()) {
String localPathRoot = System.getProperty("test.build.data", "/tmp").replace(' ', '+');
inDir = new Path(localPathRoot, inDir);
outDir = new Path(localPathRoot, outDir);
}
JobConf conf = createJobConf();
conf.setBoolean("localFS", isLocalFS());
conf.setInt("mapreduce.job.maps", 1);
cleanFlags(conf);
FileSystem fs = FileSystem.get(conf);
fs.delete(outDir, true);
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("1\n2\n");
file.close();
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.set("a", "X");
JobConf mapAConf = new JobConf(false);
mapAConf.set("a", "A");
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapAConf);
ChainMapper.addMapper(conf, BMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
JobConf reduceConf = new JobConf(false);
reduceConf.set("a", "C");
ChainReducer.setReducer(conf, CReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, reduceConf);
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
JobConf mapEConf = new JobConf(false);
mapEConf.set("a", "E");
ChainReducer.addMapper(conf, EMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapEConf);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
assertTrue(getFlag(conf, "configure.A"));
assertTrue(getFlag(conf, "configure.B"));
assertTrue(getFlag(conf, "configure.C"));
assertTrue(getFlag(conf, "configure.D"));
assertTrue(getFlag(conf, "configure.E"));
assertTrue(getFlag(conf, "map.A.value.1"));
assertTrue(getFlag(conf, "map.A.value.2"));
assertTrue(getFlag(conf, "map.B.value.1"));
assertTrue(getFlag(conf, "map.B.value.2"));
assertTrue(getFlag(conf, "reduce.C.value.2"));
assertTrue(getFlag(conf, "reduce.C.value.1"));
assertTrue(getFlag(conf, "map.D.value.1"));
assertTrue(getFlag(conf, "map.D.value.2"));
assertTrue(getFlag(conf, "map.E.value.1"));
assertTrue(getFlag(conf, "map.E.value.2"));
assertTrue(getFlag(conf, "close.A"));
assertTrue(getFlag(conf, "close.B"));
assertTrue(getFlag(conf, "close.C"));
assertTrue(getFlag(conf, "close.D"));
assertTrue(getFlag(conf, "close.E"));
}
use of java.io.DataOutputStream in project hadoop by apache.
the class TestDelegatingInputFormat method getPath.
static Path getPath(final String location, final FileSystem fs) throws IOException {
Path path = new Path(location);
// create a multi-block file on hdfs
DataOutputStream out = fs.create(path, true, 4096, (short) 2, 512, null);
for (int i = 0; i < 1000; ++i) {
out.writeChars("Hello\n");
}
out.close();
return path;
}
use of java.io.DataOutputStream in project hadoop by apache.
the class TestMultipleOutputs method _testMOWithJavaSerialization.
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
fs.delete(inDir, true);
fs.delete(outDir, true);
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
conf.setInputFormat(TextInputFormat.class);
conf.setMapOutputKeyClass(Long.class);
conf.setMapOutputValueClass(String.class);
conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
conf.setOutputKeyClass(Long.class);
conf.setOutputValueClass(String.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOJavaSerDeMap.class);
conf.setReducerClass(MOJavaSerDeReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
namedOutputCount++;
}
}
assertEquals(2, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(1, counters.size());
assertEquals(2, counters.getCounter("text"));
}
}
use of java.io.DataOutputStream in project hadoop by apache.
the class TestMultipleOutputs method _testMultipleOutputs.
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOMap.class);
conf.setReducerClass(MOReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
namedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
assertEquals(LongWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
LongWritable key = new LongWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals("sequence", value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(4, counters.size());
assertEquals(4, counters.getCounter("text"));
assertEquals(2, counters.getCounter("sequence_A"));
assertEquals(4, counters.getCounter("sequence_B"));
assertEquals(2, counters.getCounter("sequence_C"));
}
}
use of java.io.DataOutputStream in project hadoop by apache.
the class TestPipes method runNonPipedProgram.
/**
* Run a map/reduce word count that does all of the map input and reduce
* output directly rather than sending it back up to Java.
* @param mr The mini mr cluster
* @param dfs the dfs cluster
* @param program the program to run
* @throws IOException
*/
static void runNonPipedProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, JobConf conf) throws IOException {
JobConf job;
if (conf == null) {
job = mr.createJobConf();
} else {
job = new JobConf(conf);
}
job.setInputFormat(WordCountInputFormat.class);
FileSystem local = FileSystem.getLocal(job);
Path testDir = new Path("file:" + System.getProperty("test.build.data"), "pipes");
Path inDir = new Path(testDir, "input");
nonPipedOutDir = new Path(testDir, "output");
Path wordExec = new Path("testing/bin/application");
Path jobXml = new Path(testDir, "job.xml");
{
FileSystem fs = dfs.getFileSystem();
fs.delete(wordExec.getParent(), true);
fs.copyFromLocalFile(program, wordExec);
}
DataOutputStream out = local.create(new Path(inDir, "part0"));
out.writeBytes("i am a silly test\n");
out.writeBytes("you are silly\n");
out.writeBytes("i am a cat test\n");
out.writeBytes("you is silly\n");
out.writeBytes("i am a billy test\n");
out.writeBytes("hello are silly\n");
out.close();
out = local.create(new Path(inDir, "part1"));
out.writeBytes("mall world things drink java\n");
out.writeBytes("hall silly cats drink java\n");
out.writeBytes("all dogs bow wow\n");
out.writeBytes("hello drink java\n");
out.close();
local.delete(nonPipedOutDir, true);
local.mkdirs(nonPipedOutDir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
out = local.create(jobXml);
job.writeXml(out);
out.close();
System.err.println("About to run: Submitter -conf " + jobXml + " -input " + inDir + " -output " + nonPipedOutDir + " -program " + dfs.getFileSystem().makeQualified(wordExec));
try {
int ret = ToolRunner.run(new Submitter(), new String[] { "-conf", jobXml.toString(), "-input", inDir.toString(), "-output", nonPipedOutDir.toString(), "-program", dfs.getFileSystem().makeQualified(wordExec).toString(), "-reduces", "2" });
assertEquals(0, ret);
} catch (Exception e) {
assertTrue("got exception: " + StringUtils.stringifyException(e), false);
}
}
Aggregations