Search in sources :

Example 46 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestFetcher method testCopyFromHostOnAnyException.

@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testCopyFromHostOnAnyException() throws Exception {
    InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class);
    Fetcher<Text, Text> underTest = new FakeFetcher<Text, Text>(job, id, ss, mm, r, metrics, except, key, connection);
    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
    when(connection.getResponseCode()).thenReturn(200);
    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash);
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    header.write(new DataOutputStream(bout));
    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
    when(connection.getInputStream()).thenReturn(in);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
    when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())).thenReturn(immo);
    doThrow(new ArrayIndexOutOfBoundsException()).when(immo).shuffle(any(MapHost.class), any(InputStream.class), anyLong(), anyLong(), any(ShuffleClientMetrics.class), any(Reporter.class));
    underTest.copyFromHost(host);
    verify(connection).addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash);
    verify(ss, times(1)).copyFailed(map1ID, host, true, false);
}
Also used : DataOutputStream(java.io.DataOutputStream) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) IFileInputStream(org.apache.hadoop.mapred.IFileInputStream) FilterInputStream(java.io.FilterInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Reporter(org.apache.hadoop.mapred.Reporter) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) Test(org.junit.Test)

Example 47 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestFetcher method testCopyFromHostWait.

@Test
public void testCopyFromHostWait() throws Exception {
    Fetcher<Text, Text> underTest = new FakeFetcher<Text, Text>(job, id, ss, mm, r, metrics, except, key, connection);
    String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
    when(connection.getResponseCode()).thenReturn(200);
    when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash);
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    header.write(new DataOutputStream(bout));
    ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
    when(connection.getInputStream()).thenReturn(in);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
    when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)).thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
    //Defaults to null, which is what we want to test
    when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())).thenReturn(null);
    underTest.copyFromHost(host);
    verify(connection).addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash);
    verify(allErrs, never()).increment(1);
    verify(ss, never()).copyFailed(map1ID, host, true, false);
    verify(ss, never()).copyFailed(map2ID, host, true, false);
    verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
    verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 48 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestFetcher method testCorruptedIFile.

@Test
public void testCorruptedIFile() throws Exception {
    final int fetcher = 7;
    Path onDiskMapOutputPath = new Path(name.getMethodName() + "/foo");
    Path shuffledToDisk = OnDiskMapOutput.getTempPath(onDiskMapOutputPath, fetcher);
    fs = FileSystem.getLocal(job).getRaw();
    IFileWrappedMapOutput<Text, Text> odmo = new OnDiskMapOutput<Text, Text>(map1ID, mm, 100L, job, fetcher, true, fs, onDiskMapOutputPath);
    String mapData = "MAPDATA12345678901234567890";
    ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1);
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(bout);
    IFileOutputStream ios = new IFileOutputStream(dos);
    header.write(dos);
    int headerSize = dos.size();
    try {
        ios.write(mapData.getBytes());
    } finally {
        ios.close();
    }
    int dataSize = bout.size() - headerSize;
    // Ensure that the OnDiskMapOutput shuffler can successfully read the data.
    MapHost host = new MapHost("TestHost", "http://test/url");
    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
    try {
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
    } finally {
        bin.close();
    }
    // Now corrupt the IFile data.
    byte[] corrupted = bout.toByteArray();
    corrupted[headerSize + (dataSize / 2)] = 0x0;
    try {
        bin = new ByteArrayInputStream(corrupted);
        // Read past the shuffle header.
        bin.read(new byte[headerSize], 0, headerSize);
        odmo.shuffle(host, bin, dataSize, dataSize, metrics, Reporter.NULL);
        fail("OnDiskMapOutput.shuffle didn't detect the corrupted map partition file");
    } catch (ChecksumException e) {
        LOG.info("The expected checksum exception was thrown.", e);
    } finally {
        bin.close();
    }
    // Ensure that the shuffled file can be read.
    IFileInputStream iFin = new IFileInputStream(fs.open(shuffledToDisk), dataSize, job);
    try {
        iFin.read(new byte[dataSize], 0, dataSize);
    } finally {
        iFin.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) ChecksumException(org.apache.hadoop.fs.ChecksumException) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) IFileOutputStream(org.apache.hadoop.mapred.IFileOutputStream) IFileInputStream(org.apache.hadoop.mapred.IFileInputStream) Test(org.junit.Test)

Example 49 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestMapRed method checkCompression.

private void checkCompression(boolean compressMapOutputs, CompressionType redCompression, boolean includeCombine) throws Exception {
    JobConf conf = new JobConf(TestMapRed.class);
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    Path inDir = new Path(testdir, "in");
    Path outDir = new Path(testdir, "out");
    FileSystem fs = FileSystem.get(conf);
    fs.delete(testdir, true);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setMapperClass(MyMap.class);
    conf.setReducerClass(MyReduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
    if (includeCombine) {
        conf.setCombinerClass(IdentityReducer.class);
    }
    conf.setCompressMapOutput(compressMapOutputs);
    SequenceFileOutputFormat.setOutputCompressionType(conf, redCompression);
    try {
        if (!fs.mkdirs(testdir)) {
            throw new IOException("Mkdirs failed to create " + testdir.toString());
        }
        if (!fs.mkdirs(inDir)) {
            throw new IOException("Mkdirs failed to create " + inDir.toString());
        }
        Path inFile = new Path(inDir, "part0");
        DataOutputStream f = fs.create(inFile);
        f.writeBytes("Owen was here\n");
        f.writeBytes("Hadoop is fun\n");
        f.writeBytes("Is this done, yet?\n");
        f.close();
        RunningJob rj = JobClient.runJob(conf);
        assertTrue("job was complete", rj.isComplete());
        assertTrue("job was successful", rj.isSuccessful());
        Path output = new Path(outDir, Task.getOutputName(0));
        assertTrue("reduce output exists " + output, fs.exists(output));
        SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
        assertEquals("is reduce output compressed " + output, redCompression != CompressionType.NONE, rdr.isCompressed());
        rdr.close();
    } finally {
        fs.delete(testdir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Example 50 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestJobCleanup method setUp.

@BeforeClass
public static void setUp() throws IOException {
    JobConf conf = new JobConf();
    fileSys = FileSystem.get(conf);
    fileSys.delete(new Path(TEST_ROOT_DIR), true);
    conf.set("mapred.job.tracker.handler.count", "1");
    conf.set("mapred.job.tracker", "127.0.0.1:0");
    conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
    conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
    conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR + "/intermediate");
    conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true");
    mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
    inDir = new Path(TEST_ROOT_DIR, "test-input");
    String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n";
    DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
    file.writeBytes(input);
    file.close();
    emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
    fileSys.mkdirs(emptyInDir);
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) BeforeClass(org.junit.BeforeClass)

Aggregations

DataOutputStream (java.io.DataOutputStream)2968 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1314 IOException (java.io.IOException)1024 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)615 FileOutputStream (java.io.FileOutputStream)427 ByteArrayInputStream (java.io.ByteArrayInputStream)411 File (java.io.File)281 BufferedOutputStream (java.io.BufferedOutputStream)228 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)146 BufferedReader (java.io.BufferedReader)142 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)120 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107