Search in sources :

Example 76 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestChainMapReduce method testChain.

@Test
public void testChain() throws Exception {
    Path inDir = new Path("testing/chain/input");
    Path outDir = new Path("testing/chain/output");
    // Hack for local FS that does not have the concept of a 'mounting point'
    if (isLocalFS()) {
        String localPathRoot = System.getProperty("test.build.data", "/tmp").replace(' ', '+');
        inDir = new Path(localPathRoot, inDir);
        outDir = new Path(localPathRoot, outDir);
    }
    JobConf conf = createJobConf();
    conf.setBoolean("localFS", isLocalFS());
    conf.setInt("mapreduce.job.maps", 1);
    cleanFlags(conf);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outDir, true);
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
    }
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("1\n2\n");
    file.close();
    conf.setJobName("chain");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.set("a", "X");
    JobConf mapAConf = new JobConf(false);
    mapAConf.set("a", "A");
    ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapAConf);
    ChainMapper.addMapper(conf, BMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
    JobConf reduceConf = new JobConf(false);
    reduceConf.set("a", "C");
    ChainReducer.setReducer(conf, CReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, reduceConf);
    ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
    JobConf mapEConf = new JobConf(false);
    mapEConf.set("a", "E");
    ChainReducer.addMapper(conf, EMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapEConf);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    assertTrue(getFlag(conf, "configure.A"));
    assertTrue(getFlag(conf, "configure.B"));
    assertTrue(getFlag(conf, "configure.C"));
    assertTrue(getFlag(conf, "configure.D"));
    assertTrue(getFlag(conf, "configure.E"));
    assertTrue(getFlag(conf, "map.A.value.1"));
    assertTrue(getFlag(conf, "map.A.value.2"));
    assertTrue(getFlag(conf, "map.B.value.1"));
    assertTrue(getFlag(conf, "map.B.value.2"));
    assertTrue(getFlag(conf, "reduce.C.value.2"));
    assertTrue(getFlag(conf, "reduce.C.value.1"));
    assertTrue(getFlag(conf, "map.D.value.1"));
    assertTrue(getFlag(conf, "map.D.value.2"));
    assertTrue(getFlag(conf, "map.E.value.1"));
    assertTrue(getFlag(conf, "map.E.value.2"));
    assertTrue(getFlag(conf, "close.A"));
    assertTrue(getFlag(conf, "close.B"));
    assertTrue(getFlag(conf, "close.C"));
    assertTrue(getFlag(conf, "close.D"));
    assertTrue(getFlag(conf, "close.E"));
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) Test(org.junit.Test)

Example 77 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestDelegatingInputFormat method getPath.

static Path getPath(final String location, final FileSystem fs) throws IOException {
    Path path = new Path(location);
    // create a multi-block file on hdfs
    DataOutputStream out = fs.create(path, true, 4096, (short) 2, 512, null);
    for (int i = 0; i < 1000; ++i) {
        out.writeChars("Hello\n");
    }
    out.close();
    return path;
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream)

Example 78 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestMultipleOutputs method _testMOWithJavaSerialization.

protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Long.class);
    conf.setMapOutputValueClass(String.class);
    conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
    conf.setOutputKeyClass(Long.class);
    conf.setOutputValueClass(String.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOJavaSerDeMap.class);
    conf.setReducerClass(MOJavaSerDeReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(2, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(1, counters.size());
        assertEquals(2, counters.getCounter("text"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) JobClient(org.apache.hadoop.mapred.JobClient) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf)

Example 79 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOMap.class);
    conf.setReducerClass(MOReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
    assertEquals(LongWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    LongWritable key = new LongWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals("sequence", value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(4, counters.size());
        assertEquals(4, counters.getCounter("text"));
        assertEquals(2, counters.getCounter("sequence_A"));
        assertEquals(4, counters.getCounter("sequence_B"));
        assertEquals(2, counters.getCounter("sequence_C"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 80 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestPipes method runNonPipedProgram.

/**
   * Run a map/reduce word count that does all of the map input and reduce
   * output directly rather than sending it back up to Java.
   * @param mr The mini mr cluster
   * @param dfs the dfs cluster
   * @param program the program to run
   * @throws IOException
   */
static void runNonPipedProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, JobConf conf) throws IOException {
    JobConf job;
    if (conf == null) {
        job = mr.createJobConf();
    } else {
        job = new JobConf(conf);
    }
    job.setInputFormat(WordCountInputFormat.class);
    FileSystem local = FileSystem.getLocal(job);
    Path testDir = new Path("file:" + System.getProperty("test.build.data"), "pipes");
    Path inDir = new Path(testDir, "input");
    nonPipedOutDir = new Path(testDir, "output");
    Path wordExec = new Path("testing/bin/application");
    Path jobXml = new Path(testDir, "job.xml");
    {
        FileSystem fs = dfs.getFileSystem();
        fs.delete(wordExec.getParent(), true);
        fs.copyFromLocalFile(program, wordExec);
    }
    DataOutputStream out = local.create(new Path(inDir, "part0"));
    out.writeBytes("i am a silly test\n");
    out.writeBytes("you are silly\n");
    out.writeBytes("i am a cat test\n");
    out.writeBytes("you is silly\n");
    out.writeBytes("i am a billy test\n");
    out.writeBytes("hello are silly\n");
    out.close();
    out = local.create(new Path(inDir, "part1"));
    out.writeBytes("mall world things drink java\n");
    out.writeBytes("hall silly cats drink java\n");
    out.writeBytes("all dogs bow wow\n");
    out.writeBytes("hello drink java\n");
    out.close();
    local.delete(nonPipedOutDir, true);
    local.mkdirs(nonPipedOutDir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    out = local.create(jobXml);
    job.writeXml(out);
    out.close();
    System.err.println("About to run: Submitter -conf " + jobXml + " -input " + inDir + " -output " + nonPipedOutDir + " -program " + dfs.getFileSystem().makeQualified(wordExec));
    try {
        int ret = ToolRunner.run(new Submitter(), new String[] { "-conf", jobXml.toString(), "-input", inDir.toString(), "-output", nonPipedOutDir.toString(), "-program", dfs.getFileSystem().makeQualified(wordExec).toString(), "-reduces", "2" });
        assertEquals(0, ret);
    } catch (Exception e) {
        assertTrue("got exception: " + StringUtils.stringifyException(e), false);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException)

Aggregations

DataOutputStream (java.io.DataOutputStream)2968 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1314 IOException (java.io.IOException)1024 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)615 FileOutputStream (java.io.FileOutputStream)427 ByteArrayInputStream (java.io.ByteArrayInputStream)411 File (java.io.File)281 BufferedOutputStream (java.io.BufferedOutputStream)228 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)146 BufferedReader (java.io.BufferedReader)142 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)120 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107