Search in sources :

Example 1 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFileConcurrentReader method tailFile.

private long tailFile(Path file, long startPos) throws IOException {
    long numRead = 0;
    FSDataInputStream inputStream = fileSystem.open(file);
    inputStream.seek(startPos);
    int len = 4 * 1024;
    byte[] buf = new byte[len];
    int read;
    while ((read = inputStream.read(buf)) > -1) {
        LOG.info(String.format("read %d bytes", read));
        if (!validateSequentialBytes(buf, (int) (startPos + numRead), read)) {
            LOG.error(String.format("invalid bytes: [%s]\n", Arrays.toString(buf)));
            throw new ChecksumException(String.format("unable to validate bytes"), startPos);
        }
        numRead += read;
    }
    inputStream.close();
    return numRead + startPos - 1;
}
Also used : ChecksumException(org.apache.hadoop.fs.ChecksumException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 2 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFileConcurrentReader method runTestUnfinishedBlockCRCError.

private void runTestUnfinishedBlockCRCError(final boolean transferToAllowed, final SyncType syncType, final int writeSize, Configuration conf) throws IOException {
    conf.setBoolean(DFSConfigKeys.DFS_DATANODE_TRANSFERTO_ALLOWED_KEY, transferToAllowed);
    init(conf);
    final Path file = new Path("/block-being-written-to");
    final int numWrites = 2000;
    final AtomicBoolean writerDone = new AtomicBoolean(false);
    final AtomicBoolean writerStarted = new AtomicBoolean(false);
    final AtomicBoolean error = new AtomicBoolean(false);
    final Thread writer = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                FSDataOutputStream outputStream = fileSystem.create(file);
                if (syncType == SyncType.APPEND) {
                    outputStream.close();
                    outputStream = fileSystem.append(file);
                }
                try {
                    for (int i = 0; !error.get() && i < numWrites; i++) {
                        final byte[] writeBuf = DFSTestUtil.generateSequentialBytes(i * writeSize, writeSize);
                        outputStream.write(writeBuf);
                        if (syncType == SyncType.SYNC) {
                            outputStream.hflush();
                        }
                        writerStarted.set(true);
                    }
                } catch (IOException e) {
                    error.set(true);
                    LOG.error("error writing to file", e);
                } finally {
                    outputStream.close();
                }
                writerDone.set(true);
            } catch (Exception e) {
                LOG.error("error in writer", e);
                throw new RuntimeException(e);
            }
        }
    });
    Thread tailer = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                long startPos = 0;
                while (!writerDone.get() && !error.get()) {
                    if (writerStarted.get()) {
                        try {
                            startPos = tailFile(file, startPos);
                        } catch (IOException e) {
                            LOG.error(String.format("error tailing file %s", file), e);
                            throw new RuntimeException(e);
                        }
                    }
                }
            } catch (RuntimeException e) {
                if (e.getCause() instanceof ChecksumException) {
                    error.set(true);
                }
                writer.interrupt();
                LOG.error("error in tailer", e);
                throw e;
            }
        }
    });
    writer.start();
    tailer.start();
    try {
        writer.join();
        tailer.join();
        assertFalse("error occurred, see log above", error.get());
    } catch (InterruptedException e) {
        LOG.info("interrupted waiting for writer or tailer to complete");
        Thread.currentThread().interrupt();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ChecksumException(org.apache.hadoop.fs.ChecksumException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) ChecksumException(org.apache.hadoop.fs.ChecksumException) IOException(java.io.IOException)

Example 3 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestFsck method testCorruptBlock.

@Test
public void testCorruptBlock() throws Exception {
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
    // Set short retry timeouts so this test runs faster
    conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
    FileSystem fs = null;
    DFSClient dfsClient = null;
    LocatedBlocks blocks = null;
    int replicaCount = 0;
    Random random = new Random();
    String outStr = null;
    short factor = 1;
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    Path file1 = new Path("/testCorruptBlock");
    DFSTestUtil.createFile(fs, file1, 1024, factor, 0);
    // Wait until file replication has completed
    DFSTestUtil.waitReplication(fs, file1, factor);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
    // Make sure filesystem is in healthy state
    outStr = runFsck(conf, 0, true, "/");
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    // corrupt replicas
    File blockFile = cluster.getBlockFile(0, block);
    if (blockFile != null && blockFile.exists()) {
        RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
        FileChannel channel = raFile.getChannel();
        String badString = "BADBAD";
        int rand = random.nextInt((int) channel.size() / 2);
        raFile.seek(rand);
        raFile.write(badString.getBytes());
        raFile.close();
    }
    // Read the file to trigger reportBadBlocks
    try {
        IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf, true);
    } catch (IOException ie) {
        assertTrue(ie instanceof ChecksumException);
    }
    dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
    blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
    replicaCount = blocks.get(0).getLocations().length;
    while (replicaCount != factor) {
        try {
            Thread.sleep(100);
        } catch (InterruptedException ignore) {
        }
        blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
        replicaCount = blocks.get(0).getLocations().length;
    }
    assertTrue(blocks.get(0).isCorrupt());
    // Check if fsck reports the same
    outStr = runFsck(conf, 1, true, "/");
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
    assertTrue(outStr.contains("testCorruptBlock"));
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileChannel(java.nio.channels.FileChannel) ChecksumException(org.apache.hadoop.fs.ChecksumException) InetSocketAddress(java.net.InetSocketAddress) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) Matchers.anyString(org.mockito.Matchers.anyString) IOException(java.io.IOException) IOUtils(org.apache.hadoop.io.IOUtils) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 4 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestPread method testHedgedReadLoopTooManyTimes.

@Test
public void testHedgedReadLoopTooManyTimes() throws IOException {
    Configuration conf = new Configuration();
    int numHedgedReadPoolThreads = 5;
    final int hedgedReadTimeoutMillis = 50;
    conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, numHedgedReadPoolThreads);
    conf.setLong(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY, hedgedReadTimeoutMillis);
    conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 0);
    // Set up the InjectionHandler
    DFSClientFaultInjector.set(Mockito.mock(DFSClientFaultInjector.class));
    DFSClientFaultInjector injector = DFSClientFaultInjector.get();
    final int sleepMs = 100;
    Mockito.doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            if (true) {
                Thread.sleep(hedgedReadTimeoutMillis + sleepMs);
                if (DFSClientFaultInjector.exceptionNum.compareAndSet(0, 1)) {
                    System.out.println("-------------- throw Checksum Exception");
                    throw new ChecksumException("ChecksumException test", 100);
                }
            }
            return null;
        }
    }).when(injector).fetchFromDatanodeException();
    Mockito.doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            if (true) {
                Thread.sleep(sleepMs * 2);
            }
            return null;
        }
    }).when(injector).readFromDatanodeDelay();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).build();
    DistributedFileSystem fileSys = cluster.getFileSystem();
    DFSClient dfsClient = fileSys.getClient();
    FSDataOutputStream output = null;
    DFSInputStream input = null;
    String filename = "/hedgedReadMaxOut.dat";
    try {
        Path file = new Path(filename);
        output = fileSys.create(file, (short) 2);
        byte[] data = new byte[64 * 1024];
        output.write(data);
        output.flush();
        output.write(data);
        output.flush();
        output.write(data);
        output.flush();
        output.close();
        byte[] buffer = new byte[64 * 1024];
        input = dfsClient.open(filename);
        input.read(0, buffer, 0, 1024);
        input.close();
        assertEquals(3, input.getHedgedReadOpsLoopNumForTesting());
    } catch (BlockMissingException e) {
        assertTrue(false);
    } finally {
        Mockito.reset(injector);
        IOUtils.cleanup(null, input);
        IOUtils.cleanup(null, output);
        fileSys.close();
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ChecksumException(org.apache.hadoop.fs.ChecksumException) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 5 with ChecksumException

use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.

the class TestDFSAdmin method testReportCommand.

@Test(timeout = 120000)
public void testReportCommand() throws Exception {
    redirectStream();
    /* init conf */
    final Configuration dfsConf = new HdfsConfiguration();
    dfsConf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, // 0.5s
    500);
    dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
    final Path baseDir = new Path(PathUtils.getTestDir(getClass()).getAbsolutePath(), GenericTestUtils.getMethodName());
    dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.toString());
    final int numDn = 3;
    /* init cluster */
    try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(dfsConf).numDataNodes(numDn).build()) {
        miniCluster.waitActive();
        assertEquals(numDn, miniCluster.getDataNodes().size());
        /* local vars */
        final DFSAdmin dfsAdmin = new DFSAdmin(dfsConf);
        final DFSClient client = miniCluster.getFileSystem().getClient();
        /* run and verify report command */
        resetStream();
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn, 0, client);
        /* shut down one DN */
        final List<DataNode> datanodes = miniCluster.getDataNodes();
        final DataNode last = datanodes.get(datanodes.size() - 1);
        last.shutdown();
        miniCluster.setDataNodeDead(last.getDatanodeId());
        /* run and verify report command */
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, client);
        /* corrupt one block */
        final short replFactor = 1;
        final long fileLength = 512L;
        final FileSystem fs = miniCluster.getFileSystem();
        final Path file = new Path(baseDir, "/corrupted");
        DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L);
        DFSTestUtil.waitReplication(fs, file, replFactor);
        final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file);
        final int blockFilesCorrupted = miniCluster.corruptBlockOnDataNodes(block);
        assertEquals("Fail to corrupt all replicas for block " + block, replFactor, blockFilesCorrupted);
        try {
            IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), conf, true);
            fail("Should have failed to read the file with corrupted blocks.");
        } catch (ChecksumException ignored) {
        // expected exception reading corrupt blocks
        }
        /*
       * Increase replication factor, this should invoke transfer request.
       * Receiving datanode fails on checksum and reports it to namenode
       */
        fs.setReplication(file, (short) (replFactor + 1));
        /* get block details and check if the block is corrupt */
        GenericTestUtils.waitFor(new Supplier<Boolean>() {

            @Override
            public Boolean get() {
                LocatedBlocks blocks = null;
                try {
                    miniCluster.triggerBlockReports();
                    blocks = client.getNamenode().getBlockLocations(file.toString(), 0, Long.MAX_VALUE);
                } catch (IOException e) {
                    return false;
                }
                return blocks != null && blocks.get(0).isCorrupt();
            }
        }, 1000, 60000);
        BlockManagerTestUtil.updateState(miniCluster.getNameNode().getNamesystem().getBlockManager());
        /* run and verify report command */
        resetStream();
        assertEquals(0, ToolRunner.run(dfsAdmin, new String[] { "-report" }));
        verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DFSClient(org.apache.hadoop.hdfs.DFSClient) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ChecksumException(org.apache.hadoop.fs.ChecksumException) StrBuilder(org.apache.commons.lang.text.StrBuilder) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOUtils(org.apache.hadoop.io.IOUtils) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Aggregations

ChecksumException (org.apache.hadoop.fs.ChecksumException)20 Path (org.apache.hadoop.fs.Path)12 Test (org.junit.Test)12 IOException (java.io.IOException)9 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)5 File (java.io.File)4 RandomAccessFile (java.io.RandomAccessFile)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)4 DataOutputStream (java.io.DataOutputStream)3 InterruptedIOException (java.io.InterruptedIOException)3 InetSocketAddress (java.net.InetSocketAddress)3 ByteBuffer (java.nio.ByteBuffer)3 DFSClient (org.apache.hadoop.hdfs.DFSClient)3 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)3 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)3