Examples with DFSTestUtil - org.apache.hadoop.hdfs.DFSTestUtil

Example 11 with DFSTestUtil

use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.

the class TestListCorruptFileBlocks method testMaxCorruptFiles.

/**
   * Test if NN.listCorruptFiles() returns the right number of results.
   * The corrupt blocks are detected by the BlockPoolSliceScanner.
   * Also, test that DFS.listCorruptFileBlocks can make multiple successive
   * calls.
   */
@Test(timeout = 300000)
public void testMaxCorruptFiles() throws Exception {
    MiniDFSCluster cluster = null;
    try {
        Configuration conf = new HdfsConfiguration();
        // datanode sends block reports
        conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
        cluster = new MiniDFSCluster.Builder(conf).build();
        FileSystem fs = cluster.getFileSystem();
        final int maxCorruptFileBlocks = FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
        // create 110 files with one block each
        DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles").setNumFiles(maxCorruptFileBlocks * 3).setMaxLevels(1).setMaxSize(512).build();
        util.createFiles(fs, "/srcdat2", (short) 1);
        util.waitReplication(fs, "/srcdat2", (short) 1);
        // verify that there are no bad blocks.
        final NameNode namenode = cluster.getNameNode();
        Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.", badFiles.size() == 0);
        // Now deliberately blocks from all files
        final String bpid = cluster.getNamesystem().getBlockPoolId();
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j <= 1; j++) {
                File storageDir = cluster.getInstanceStorageDir(i, j);
                File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
                LOG.info("Removing files from " + data_dir);
                List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
                if (metadataFiles == null)
                    continue;
                for (File metadataFile : metadataFiles) {
                    File blockFile = Block.metaToBlockFile(metadataFile);
                    assertTrue("Cannot remove file.", blockFile.delete());
                    assertTrue("Cannot remove file.", metadataFile.delete());
                }
            }
        }
        // Run the direcrtoryScanner to update the Datanodes volumeMap
        DataNode dn = cluster.getDataNodes().get(0);
        DataNodeTestUtils.runDirectoryScanner(dn);
        // Occasionally the BlockPoolSliceScanner can run before we have removed
        // the blocks. Restart the Datanode to trigger the scanner into running
        // once more.
        LOG.info("Restarting Datanode to trigger BlockPoolSliceScanner");
        cluster.restartDataNodes();
        cluster.waitActive();
        badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        while (badFiles.size() < maxCorruptFileBlocks) {
            LOG.info("# of corrupt files is: " + badFiles.size());
            Thread.sleep(10000);
            badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        }
        badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
        LOG.info("Namenode has bad files. " + badFiles.size());
        assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + maxCorruptFileBlocks + ".", badFiles.size() == maxCorruptFileBlocks);
        CorruptFileBlockIterator iter = (CorruptFileBlockIterator) fs.listCorruptFileBlocks(new Path("/srcdat2"));
        int corruptPaths = countPaths(iter);
        assertTrue("Expected more than " + maxCorruptFileBlocks + " corrupt file blocks but got " + corruptPaths, corruptPaths > maxCorruptFileBlocks);
        assertTrue("Iterator should have made more than 1 call but made " + iter.getCallsMade(), iter.getCallsMade() > 1);
        util.cleanup(fs, "/srcdat2");
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) CorruptFileBlockIterator(org.apache.hadoop.hdfs.client.impl.CorruptFileBlockIterator) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 12 with DFSTestUtil

use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.

the class TestDatanodeRestart method testFinalizedReplicas.

// test finalized replicas persist across DataNode restarts
@Test
public void testFinalizedReplicas() throws Exception {
    // bring up a cluster of 3
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024L);
    conf.setInt(HdfsClientConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY, 512);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();
    try {
        // test finalized replicas
        final String TopDir = "/test";
        DFSTestUtil util = new DFSTestUtil.Builder().setName("TestDatanodeRestart").setNumFiles(2).build();
        util.createFiles(fs, TopDir, (short) 3);
        util.waitReplication(fs, TopDir, (short) 3);
        util.checkFiles(fs, TopDir);
        cluster.restartDataNodes();
        cluster.waitActive();
        util.checkFiles(fs, TopDir);
    } finally {
        cluster.shutdown();
    }
}

Also used : DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 13 with DFSTestUtil

use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.

the class TestFsck method testFsckOpenECFiles.

@Test
public void testFsckOpenECFiles() throws Exception {
    DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsckECFile").setNumFiles(4).build();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
    ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy();
    final int dataBlocks = ecPolicy.getNumDataUnits();
    final int cellSize = ecPolicy.getCellSize();
    final int numAllUnits = dataBlocks + ecPolicy.getNumParityUnits();
    int blockSize = 2 * cellSize;
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, ecPolicy.getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numAllUnits + 1).build();
    String topDir = "/myDir";
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    util.createFiles(fs, topDir);
    // set topDir to EC when it has replicated files
    cluster.getFileSystem().getClient().setErasureCodingPolicy(topDir, ecPolicy.getName());
    // create a new file under topDir
    DFSTestUtil.createFile(fs, new Path(topDir, "ecFile"), 1024, (short) 1, 0L);
    // Open a EC file for writing and do not close for now
    Path openFile = new Path(topDir + "/openECFile");
    FSDataOutputStream out = fs.create(openFile);
    int blockGroupSize = dataBlocks * blockSize;
    // data size is more than 1 block group and less than 2 block groups
    byte[] randomBytes = new byte[2 * blockGroupSize - cellSize];
    int seed = 42;
    new Random(seed).nextBytes(randomBytes);
    out.write(randomBytes);
    // make sure the fsck can correctly handle mixed ec/replicated files
    runFsck(conf, 0, true, topDir, "-files", "-blocks", "-openforwrite");
    // We expect the filesystem to be HEALTHY and show one open file
    String outStr = runFsck(conf, 0, true, openFile.toString(), "-files", "-blocks", "-openforwrite");
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(outStr.contains("OPENFORWRITE"));
    assertTrue(outStr.contains("Live_repl=" + numAllUnits));
    assertTrue(outStr.contains("Expected_repl=" + numAllUnits));
    // Use -openforwrite option to list open files
    outStr = runFsck(conf, 0, true, openFile.toString(), "-files", "-blocks", "-locations", "-openforwrite", "-replicaDetails");
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(outStr.contains("OPENFORWRITE"));
    assertTrue(outStr.contains("Live_repl=" + numAllUnits));
    assertTrue(outStr.contains("Expected_repl=" + numAllUnits));
    assertTrue(outStr.contains("Under Construction Block:"));
    // check reported blockIDs of internal blocks
    LocatedStripedBlock lsb = (LocatedStripedBlock) fs.getClient().getLocatedBlocks(openFile.toString(), 0, cellSize * dataBlocks).get(0);
    long groupId = lsb.getBlock().getBlockId();
    byte[] indices = lsb.getBlockIndices();
    DatanodeInfo[] locs = lsb.getLocations();
    long blockId;
    for (int i = 0; i < indices.length; i++) {
        blockId = groupId + indices[i];
        String str = "blk_" + blockId + ":" + locs[i];
        assertTrue(outStr.contains(str));
    }
    // check the output of under-constructed blocks doesn't include the blockIDs
    String regex = ".*Expected_repl=" + numAllUnits + "(.*)\nStatus:.*";
    Pattern p = Pattern.compile(regex, Pattern.DOTALL);
    Matcher m = p.matcher(outStr);
    assertTrue(m.find());
    String ucBlockOutput = m.group(1);
    assertFalse(ucBlockOutput.contains("blk_"));
    // Close the file
    out.close();
    // Now, fsck should show HEALTHY fs and should not show any open files
    outStr = runFsck(conf, 0, true, openFile.toString(), "-files", "-blocks", "-locations", "-racks", "-replicaDetails");
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    assertFalse(outStr.contains("OPENFORWRITE"));
    assertFalse(outStr.contains("Under Construction Block:"));
    assertFalse(outStr.contains("Expected_repl=" + numAllUnits));
    assertTrue(outStr.contains("Live_repl=" + numAllUnits));
    util.cleanup(fs, topDir);
}

Also used : Path(org.apache.hadoop.fs.Path) DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) Pattern(java.util.regex.Pattern) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Matcher(java.util.regex.Matcher) ErasureCodingPolicy(org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy) Matchers.anyString(org.mockito.Matchers.anyString) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) Random(java.util.Random) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 14 with DFSTestUtil

use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.

the class TestFsck method testFsckSymlink.

/** Test fsck with symlinks in the filesystem. */
@Test
public void testFsckSymlink() throws Exception {
    final DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
    FileSystem fs = null;
    final long precision = 1L;
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, precision);
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    fs = cluster.getFileSystem();
    final String fileName = "/srcdat";
    util.createFiles(fs, fileName);
    final FileContext fc = FileContext.getFileContext(cluster.getConfiguration(0));
    final Path file = new Path(fileName);
    final Path symlink = new Path("/srcdat-symlink");
    fc.createSymlink(file, symlink, false);
    util.waitReplication(fs, fileName, (short) 3);
    long aTime = fc.getFileStatus(symlink).getAccessTime();
    Thread.sleep(precision);
    setupAuditLogs();
    String outStr = runFsck(conf, 0, true, "/");
    verifyAuditLogs();
    assertEquals(aTime, fc.getFileStatus(symlink).getAccessTime());
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(outStr.contains("Total symlinks:\t\t1"));
    util.cleanup(fs, fileName);
}

Also used : Path(org.apache.hadoop.fs.Path) DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Matchers.anyString(org.mockito.Matchers.anyString) FileContext(org.apache.hadoop.fs.FileContext) Test(org.junit.Test)

Example 15 with DFSTestUtil

use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.

the class TestFsck method testFsckListCorruptSnapshotFiles.

/**
   * Test that corrupted snapshot files are listed with full dir.
   */
@Test
public void testFsckListCorruptSnapshotFiles() throws Exception {
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
    conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
    DistributedFileSystem hdfs = null;
    final short replFactor = 1;
    int numFiles = 3;
    int numSnapshots = 0;
    cluster = new MiniDFSCluster.Builder(conf).build();
    cluster.waitActive();
    hdfs = cluster.getFileSystem();
    DFSTestUtil util = new DFSTestUtil.Builder().setName("testGetCorruptFiles").setNumFiles(numFiles).setMaxLevels(1).setMaxSize(1024).build();
    util.createFiles(hdfs, "/corruptData", (short) 1);
    final Path fp = new Path("/corruptData/file");
    util.createFile(hdfs, fp, 1024, replFactor, 1000L);
    numFiles++;
    util.waitReplication(hdfs, "/corruptData", (short) 1);
    hdfs.allowSnapshot(new Path("/corruptData"));
    hdfs.createSnapshot(new Path("/corruptData"), "mySnapShot");
    numSnapshots = numFiles;
    String outStr = runFsck(conf, 0, false, "/corruptData", "-list-corruptfileblocks");
    System.out.println("1. good fsck out: " + outStr);
    assertTrue(outStr.contains("has 0 CORRUPT files"));
    // delete the blocks
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i = 0; i < numFiles; i++) {
        for (int j = 0; j <= 1; j++) {
            File storageDir = cluster.getInstanceStorageDir(i, j);
            File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
            List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(dataDir);
            if (metadataFiles == null) {
                continue;
            }
            for (File metadataFile : metadataFiles) {
                File blockFile = Block.metaToBlockFile(metadataFile);
                assertTrue("Cannot remove file.", blockFile.delete());
                assertTrue("Cannot remove file.", metadataFile.delete());
            }
        }
    }
    // Delete file when it has a snapshot
    hdfs.delete(fp, false);
    numFiles--;
    // wait for the namenode to see the corruption
    final NamenodeProtocols namenode = cluster.getNameNodeRpc();
    CorruptFileBlocks corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData", null);
    int numCorrupt = corruptFileBlocks.getFiles().length;
    while (numCorrupt == 0) {
        Thread.sleep(1000);
        corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData", null);
        numCorrupt = corruptFileBlocks.getFiles().length;
    }
    // with -includeSnapshots all files are reported
    outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks", "-includeSnapshots");
    System.out.println("2. bad fsck include snapshot out: " + outStr);
    assertTrue(outStr.contains("has " + (numFiles + numSnapshots) + " CORRUPT files"));
    assertTrue(outStr.contains("/.snapshot/"));
    // without -includeSnapshots only non-snapshots are reported
    outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks");
    System.out.println("3. bad fsck exclude snapshot out: " + outStr);
    assertTrue(outStr.contains("has " + numFiles + " CORRUPT files"));
    assertFalse(outStr.contains("/.snapshot/"));
}

Also used : Path(org.apache.hadoop.fs.Path) DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) NamenodeProtocols(org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols) CorruptFileBlocks(org.apache.hadoop.hdfs.protocol.CorruptFileBlocks) Matchers.anyString(org.mockito.Matchers.anyString) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Aggregations

DFSTestUtil (org.apache.hadoop.hdfs.DFSTestUtil)25 Test (org.junit.Test)25 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)23 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)22 Path (org.apache.hadoop.fs.Path)17 Matchers.anyString (org.mockito.Matchers.anyString)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 File (java.io.File)10 RandomAccessFile (java.io.RandomAccessFile)10 Configuration (org.apache.hadoop.conf.Configuration)8 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)8 IOException (java.io.IOException)7 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)5 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)5 FileNotFoundException (java.io.FileNotFoundException)4 Random (java.util.Random)4 TimeoutException (java.util.concurrent.TimeoutException)4 ChecksumException (org.apache.hadoop.fs.ChecksumException)4 UnresolvedLinkException (org.apache.hadoop.fs.UnresolvedLinkException)4 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)4