Search in sources :

Example 51 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestJobSysDirWithDFS method launchWordCount.

public static TestResult launchWordCount(JobConf conf, Path inDir, Path outDir, String input, int numMaps, int numReduces, String sysDir) throws IOException {
    FileSystem inFs = inDir.getFileSystem(conf);
    FileSystem outFs = outDir.getFileSystem(conf);
    outFs.delete(outDir, true);
    if (!inFs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
    }
    {
        DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
        file.writeBytes(input);
        file.close();
    }
    conf.setJobName("wordcount");
    conf.setInputFormat(TextInputFormat.class);
    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);
    conf.setMapperClass(WordCount.MapClass.class);
    conf.setCombinerClass(WordCount.Reduce.class);
    conf.setReducerClass(WordCount.Reduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setNumMapTasks(numMaps);
    conf.setNumReduceTasks(numReduces);
    conf.set(JTConfig.JT_SYSTEM_DIR, "/tmp/subru/mapred/system");
    JobClient jobClient = new JobClient(conf);
    RunningJob job = jobClient.runJob(conf);
    // Checking that the Job Client system dir is not used
    assertFalse(FileSystem.get(conf).exists(new Path(conf.get(JTConfig.JT_SYSTEM_DIR))));
    // Check if the Job Tracker system dir is propogated to client
    assertFalse(sysDir.contains("/tmp/subru/mapred/system"));
    assertTrue(sysDir.contains("custom"));
    return new TestResult(job, MapReduceTestUtil.readOutput(outDir, conf));
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException)

Example 52 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestGridMixClasses method testLoadSplit.

/*
   * simple test LoadSplit (getters,copy, write, read...)
   */
@Test(timeout = 1000)
public void testLoadSplit() throws Exception {
    LoadSplit test = getLoadSplit();
    ByteArrayOutputStream data = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(data);
    test.write(out);
    LoadSplit copy = new LoadSplit();
    copy.readFields(new DataInputStream(new ByteArrayInputStream(data.toByteArray())));
    // data should be the same
    assertEquals(test.getId(), copy.getId());
    assertEquals(test.getMapCount(), copy.getMapCount());
    assertEquals(test.getInputRecords(), copy.getInputRecords());
    assertEquals(test.getOutputBytes()[0], copy.getOutputBytes()[0]);
    assertEquals(test.getOutputRecords()[0], copy.getOutputRecords()[0]);
    assertEquals(test.getReduceBytes(0), copy.getReduceBytes(0));
    assertEquals(test.getReduceRecords(0), copy.getReduceRecords(0));
    assertEquals(test.getMapResourceUsageMetrics().getCumulativeCpuUsage(), copy.getMapResourceUsageMetrics().getCumulativeCpuUsage());
    assertEquals(test.getReduceResourceUsageMetrics(0).getCumulativeCpuUsage(), copy.getReduceResourceUsageMetrics(0).getCumulativeCpuUsage());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Example 53 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestGridMixClasses method testLoadJobLoadSortComparator.

/*
   * test LoadSortComparator
   */
@Test(timeout = 3000)
public void testLoadJobLoadSortComparator() throws Exception {
    LoadJob.LoadSortComparator test = new LoadJob.LoadSortComparator();
    ByteArrayOutputStream data = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(data);
    WritableUtils.writeVInt(dos, 2);
    WritableUtils.writeVInt(dos, 1);
    WritableUtils.writeVInt(dos, 4);
    WritableUtils.writeVInt(dos, 7);
    WritableUtils.writeVInt(dos, 4);
    byte[] b1 = data.toByteArray();
    byte[] b2 = data.toByteArray();
    // the same data should be equals
    assertEquals(0, test.compare(b1, 0, 1, b2, 0, 1));
    b2[2] = 5;
    // compare like GridMixKey first byte: shift count -1=4-5
    assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1));
    b2[2] = 2;
    // compare like GridMixKey first byte: shift count 2=4-2
    assertEquals(2, test.compare(b1, 0, 1, b2, 0, 1));
    // compare arrays by first byte witch offset (2-1) because 4==4
    b2[2] = 4;
    assertEquals(1, test.compare(b1, 0, 1, b2, 1, 1));
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 54 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestGridMixClasses method testGridmixJobSpecGroupingComparator.

/*
   * test SpecGroupingComparator
   */
@Test(timeout = 3000)
public void testGridmixJobSpecGroupingComparator() throws Exception {
    GridmixJob.SpecGroupingComparator test = new GridmixJob.SpecGroupingComparator();
    ByteArrayOutputStream data = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(data);
    WritableUtils.writeVInt(dos, 2);
    WritableUtils.writeVInt(dos, 1);
    // 0: REDUCE SPEC
    WritableUtils.writeVInt(dos, 0);
    WritableUtils.writeVInt(dos, 7);
    WritableUtils.writeVInt(dos, 4);
    byte[] b1 = data.toByteArray();
    byte[] b2 = data.toByteArray();
    // the same object should be equals
    assertEquals(0, test.compare(b1, 0, 1, b2, 0, 1));
    b2[2] = 1;
    // for Reduce
    assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1));
    // by Reduce spec
    // 1: DATA SPEC
    b2[2] = 1;
    assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1));
    // compare GridmixKey the same objects should be equals
    assertEquals(0, test.compare(new GridmixKey(GridmixKey.DATA, 100, 2), new GridmixKey(GridmixKey.DATA, 100, 2)));
    // REDUSE SPEC
    assertEquals(-1, test.compare(new GridmixKey(GridmixKey.REDUCE_SPEC, 100, 2), new GridmixKey(GridmixKey.DATA, 100, 2)));
    assertEquals(1, test.compare(new GridmixKey(GridmixKey.DATA, 100, 2), new GridmixKey(GridmixKey.REDUCE_SPEC, 100, 2)));
    // only DATA
    assertEquals(2, test.compare(new GridmixKey(GridmixKey.DATA, 102, 2), new GridmixKey(GridmixKey.DATA, 100, 2)));
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 55 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class StreamBackedIterator method clear.

public void clear() {
    if (null != inbuf)
        inbuf.resetStream();
    outbuf.reset();
    outfbuf = new DataOutputStream(outbuf);
}
Also used : DataOutputStream(java.io.DataOutputStream)

Aggregations

DataOutputStream (java.io.DataOutputStream)2968 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1314 IOException (java.io.IOException)1024 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)615 FileOutputStream (java.io.FileOutputStream)427 ByteArrayInputStream (java.io.ByteArrayInputStream)411 File (java.io.File)281 BufferedOutputStream (java.io.BufferedOutputStream)228 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)146 BufferedReader (java.io.BufferedReader)142 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)120 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107