Search in sources :

Example 1 with HdfsBlockLocation

use of org.apache.hadoop.fs.HdfsBlockLocation in project hadoop by apache.

the class TestFsDatasetCache method testCacheAndUncacheBlock.

private void testCacheAndUncacheBlock() throws Exception {
    LOG.info("beginning testCacheAndUncacheBlock");
    final int NUM_BLOCKS = 5;
    DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);
    assertEquals(0, fsd.getNumBlocksCached());
    // Write a test file
    final Path testFile = new Path("/testCacheBlock");
    final long testFileLen = BLOCK_SIZE * NUM_BLOCKS;
    DFSTestUtil.createFile(fs, testFile, testFileLen, (short) 1, 0xABBAl);
    // Get the details of the written file
    HdfsBlockLocation[] locs = (HdfsBlockLocation[]) fs.getFileBlockLocations(testFile, 0, testFileLen);
    assertEquals("Unexpected number of blocks", NUM_BLOCKS, locs.length);
    final long[] blockSizes = getBlockSizes(locs);
    // Check initial state
    final long cacheCapacity = fsd.getCacheCapacity();
    long cacheUsed = fsd.getCacheUsed();
    long current = 0;
    assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
    assertEquals("Unexpected amount of cache used", current, cacheUsed);
    MetricsRecordBuilder dnMetrics;
    long numCacheCommands = 0;
    long numUncacheCommands = 0;
    // Cache each block in succession, checking each time
    for (int i = 0; i < NUM_BLOCKS; i++) {
        setHeartbeatResponse(cacheBlock(locs[i]));
        current = DFSTestUtil.verifyExpectedCacheUsage(current + blockSizes[i], i + 1, fsd);
        dnMetrics = getMetrics(dn.getMetrics().name());
        long cmds = MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
        assertTrue("Expected more cache requests from the NN (" + cmds + " <= " + numCacheCommands + ")", cmds > numCacheCommands);
        numCacheCommands = cmds;
    }
    // Uncache each block in succession, again checking each time
    for (int i = 0; i < NUM_BLOCKS; i++) {
        setHeartbeatResponse(uncacheBlock(locs[i]));
        current = DFSTestUtil.verifyExpectedCacheUsage(current - blockSizes[i], NUM_BLOCKS - 1 - i, fsd);
        dnMetrics = getMetrics(dn.getMetrics().name());
        long cmds = MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
        assertTrue("Expected more uncache requests from the NN", cmds > numUncacheCommands);
        numUncacheCommands = cmds;
    }
    LOG.info("finishing testCacheAndUncacheBlock");
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder)

Example 2 with HdfsBlockLocation

use of org.apache.hadoop.fs.HdfsBlockLocation in project hadoop by apache.

the class TestFsDatasetCache method testUncachingBlocksBeforeCachingFinishes.

@Test(timeout = 600000)
public void testUncachingBlocksBeforeCachingFinishes() throws Exception {
    LOG.info("beginning testUncachingBlocksBeforeCachingFinishes");
    final int NUM_BLOCKS = 5;
    DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);
    // Write a test file
    final Path testFile = new Path("/testCacheBlock");
    final long testFileLen = BLOCK_SIZE * NUM_BLOCKS;
    DFSTestUtil.createFile(fs, testFile, testFileLen, (short) 1, 0xABBAl);
    // Get the details of the written file
    HdfsBlockLocation[] locs = (HdfsBlockLocation[]) fs.getFileBlockLocations(testFile, 0, testFileLen);
    assertEquals("Unexpected number of blocks", NUM_BLOCKS, locs.length);
    final long[] blockSizes = getBlockSizes(locs);
    // Check initial state
    final long cacheCapacity = fsd.getCacheCapacity();
    long cacheUsed = fsd.getCacheUsed();
    long current = 0;
    assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
    assertEquals("Unexpected amount of cache used", current, cacheUsed);
    NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator() {

        @Override
        public void mlock(String identifier, ByteBuffer mmap, long length) throws IOException {
            LOG.info("An mlock operation is starting on " + identifier);
            try {
                Thread.sleep(3000);
            } catch (InterruptedException e) {
                Assert.fail();
            }
        }
    });
    // should increase, even though caching doesn't complete on any of them.
    for (int i = 0; i < NUM_BLOCKS; i++) {
        setHeartbeatResponse(cacheBlock(locs[i]));
        current = DFSTestUtil.verifyExpectedCacheUsage(current + blockSizes[i], i + 1, fsd);
    }
    setHeartbeatResponse(new DatanodeCommand[] { getResponse(locs, DatanodeProtocol.DNA_UNCACHE) });
    // wait until all caching jobs are finished cancelling.
    current = DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);
    LOG.info("finishing testUncachingBlocksBeforeCachingFinishes");
}
Also used : Path(org.apache.hadoop.fs.Path) NoMlockCacheManipulator(org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 3 with HdfsBlockLocation

use of org.apache.hadoop.fs.HdfsBlockLocation in project hadoop by apache.

the class DFSClient method getBlockLocations.

/**
   * Get block location info about file
   *
   * getBlockLocations() returns a list of hostnames that store
   * data for a specific file region.  It returns a set of hostnames
   * for every block within the indicated region.
   *
   * This function is very useful when writing code that considers
   * data-placement when performing operations.  For example, the
   * MapReduce system tries to schedule tasks on the same machines
   * as the data-block the task processes.
   */
public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException {
    checkOpen();
    try (TraceScope ignored = newPathTraceScope("getBlockLocations", src)) {
        LocatedBlocks blocks = getLocatedBlocks(src, start, length);
        BlockLocation[] locations = DFSUtilClient.locatedBlocks2Locations(blocks);
        HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
        for (int i = 0; i < locations.length; i++) {
            hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
        }
        return hdfsLocations;
    }
}
Also used : HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) TraceScope(org.apache.htrace.core.TraceScope) BlockLocation(org.apache.hadoop.fs.BlockLocation) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation)

Example 4 with HdfsBlockLocation

use of org.apache.hadoop.fs.HdfsBlockLocation in project hadoop by apache.

the class TestFsDatasetCache method testFilesExceedMaxLockedMemory.

@Test(timeout = 600000)
public void testFilesExceedMaxLockedMemory() throws Exception {
    LOG.info("beginning testFilesExceedMaxLockedMemory");
    // Create some test files that will exceed total cache capacity
    final int numFiles = 5;
    final long fileSize = CACHE_CAPACITY / (numFiles - 1);
    final Path[] testFiles = new Path[numFiles];
    final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
    final long[] fileSizes = new long[numFiles];
    for (int i = 0; i < numFiles; i++) {
        testFiles[i] = new Path("/testFilesExceedMaxLockedMemory-" + i);
        DFSTestUtil.createFile(fs, testFiles[i], fileSize, (short) 1, 0xDFAl);
        fileLocs[i] = (HdfsBlockLocation[]) fs.getFileBlockLocations(testFiles[i], 0, fileSize);
        // Get the file size (sum of blocks)
        long[] sizes = getBlockSizes(fileLocs[i]);
        for (int j = 0; j < sizes.length; j++) {
            fileSizes[i] += sizes[j];
        }
    }
    // Cache the first n-1 files
    long total = 0;
    DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);
    for (int i = 0; i < numFiles - 1; i++) {
        setHeartbeatResponse(cacheBlocks(fileLocs[i]));
        total = DFSTestUtil.verifyExpectedCacheUsage(rounder.roundUp(total + fileSizes[i]), 4 * (i + 1), fsd);
    }
    // nth file should hit a capacity exception
    final LogVerificationAppender appender = new LogVerificationAppender();
    final Logger logger = Logger.getRootLogger();
    logger.addAppender(appender);
    setHeartbeatResponse(cacheBlocks(fileLocs[numFiles - 1]));
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            int lines = appender.countLinesWithMessage("more bytes in the cache: " + DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
            return lines > 0;
        }
    }, 500, 30000);
    // Also check the metrics for the failure
    assertTrue("Expected more than 0 failed cache attempts", fsd.getNumBlocksFailedToCache() > 0);
    // Uncache the n-1 files
    int curCachedBlocks = 16;
    for (int i = 0; i < numFiles - 1; i++) {
        setHeartbeatResponse(uncacheBlocks(fileLocs[i]));
        long uncachedBytes = rounder.roundUp(fileSizes[i]);
        total -= uncachedBytes;
        curCachedBlocks -= uncachedBytes / BLOCK_SIZE;
        DFSTestUtil.verifyExpectedCacheUsage(total, curCachedBlocks, fsd);
    }
    LOG.info("finishing testFilesExceedMaxLockedMemory");
}
Also used : Path(org.apache.hadoop.fs.Path) LogVerificationAppender(org.apache.hadoop.hdfs.LogVerificationAppender) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) Logger(org.apache.log4j.Logger) Matchers.anyBoolean(org.mockito.Matchers.anyBoolean) Test(org.junit.Test)

Example 5 with HdfsBlockLocation

use of org.apache.hadoop.fs.HdfsBlockLocation in project hadoop by apache.

the class TestFsDatasetCache method testUncacheUnknownBlock.

@Test(timeout = 60000)
public void testUncacheUnknownBlock() throws Exception {
    // Create a file
    Path fileName = new Path("/testUncacheUnknownBlock");
    int fileLen = 4096;
    DFSTestUtil.createFile(fs, fileName, fileLen, (short) 1, 0xFDFD);
    HdfsBlockLocation[] locs = (HdfsBlockLocation[]) fs.getFileBlockLocations(fileName, 0, fileLen);
    // Try to uncache it without caching it first
    setHeartbeatResponse(uncacheBlocks(locs));
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            return fsd.getNumBlocksFailedToUncache() > 0;
        }
    }, 100, 10000);
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) Matchers.anyBoolean(org.mockito.Matchers.anyBoolean) Test(org.junit.Test)

Aggregations

HdfsBlockLocation (org.apache.hadoop.fs.HdfsBlockLocation)7 Path (org.apache.hadoop.fs.Path)5 Test (org.junit.Test)4 Matchers.anyBoolean (org.mockito.Matchers.anyBoolean)2 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 FileChannel (java.nio.channels.FileChannel)1 BlockLocation (org.apache.hadoop.fs.BlockLocation)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 LogVerificationAppender (org.apache.hadoop.hdfs.LogVerificationAppender)1 Block (org.apache.hadoop.hdfs.protocol.Block)1 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)1 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)1 NoMlockCacheManipulator (org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator)1 MetricsRecordBuilder (org.apache.hadoop.metrics2.MetricsRecordBuilder)1 TraceScope (org.apache.htrace.core.TraceScope)1 Logger (org.apache.log4j.Logger)1