use of org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache in project hadoop by apache.
the class BlockReaderFactory method requestFileDescriptors.
/**
* Request file descriptors from a DomainPeer.
*
* @param peer The peer to use for communication.
* @param slot If non-null, the shared memory slot to associate with the
* new ShortCircuitReplica.
*
* @return A ShortCircuitReplica object if we could communicate with the
* datanode; null, otherwise.
* @throws IOException If we encountered an I/O exception while communicating
* with the datanode.
*/
private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, Slot slot) throws IOException {
ShortCircuitCache cache = clientContext.getShortCircuitCache();
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
SlotId slotId = slot == null ? null : slot.getSlotId();
new Sender(out).requestShortCircuitFds(block, token, slotId, 1, failureInjector.getSupportsReceiptVerification());
DataInputStream in = new DataInputStream(peer.getInputStream());
BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(PBHelperClient.vintPrefixed(in));
DomainSocket sock = peer.getDomainSocket();
failureInjector.injectRequestFileDescriptorsFailure();
switch(resp.getStatus()) {
case SUCCESS:
byte[] buf = new byte[1];
FileInputStream[] fis = new FileInputStream[2];
sock.recvFileInputStreams(fis, buf, 0, buf.length);
ShortCircuitReplica replica = null;
try {
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
if (buf[0] == USE_RECEIPT_VERIFICATION.getNumber()) {
LOG.trace("Sending receipt verification byte for slot {}", slot);
sock.getOutputStream().write(0);
}
replica = new ShortCircuitReplica(key, fis[0], fis[1], cache, Time.monotonicNow(), slot);
return new ShortCircuitReplicaInfo(replica);
} catch (IOException e) {
// This indicates an error reading from disk, or a format error. Since
// it's not a socket communication problem, we return null rather than
// throwing an exception.
LOG.warn(this + ": error creating ShortCircuitReplica.", e);
return null;
} finally {
if (replica == null) {
IOUtilsClient.cleanup(DFSClient.LOG, fis[0], fis[1]);
}
}
case ERROR_UNSUPPORTED:
if (!resp.hasShortCircuitAccessVersion()) {
LOG.warn("short-circuit read access is disabled for " + "DataNode " + datanode + ". reason: " + resp.getMessage());
clientContext.getDomainSocketFactory().disableShortCircuitForPath(pathInfo.getPath());
} else {
LOG.warn("short-circuit read access for the file " + fileName + " is disabled for DataNode " + datanode + ". reason: " + resp.getMessage());
}
return null;
case ERROR_ACCESS_TOKEN:
String msg = "access control error while " + "attempting to set up short-circuit access to " + fileName + resp.getMessage();
LOG.debug("{}:{}", this, msg);
return new ShortCircuitReplicaInfo(new InvalidToken(msg));
default:
LOG.warn(this + ": unknown response code " + resp.getStatus() + " while attempting to set up short-circuit access. " + resp.getMessage());
clientContext.getDomainSocketFactory().disableShortCircuitForPath(pathInfo.getPath());
return null;
}
}
use of org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache in project hadoop by apache.
the class BlockReaderFactory method getBlockReaderLocal.
private BlockReader getBlockReaderLocal() throws InvalidToken {
LOG.trace("{}: trying to construct a BlockReaderLocal for short-circuit " + " reads.", this);
if (pathInfo == null) {
pathInfo = clientContext.getDomainSocketFactory().getPathInfo(inetSocketAddress, conf.getShortCircuitConf());
}
if (!pathInfo.getPathState().getUsableForShortCircuit()) {
PerformanceAdvisory.LOG.debug("{}: {} is not usable for short circuit; " + "giving up on BlockReaderLocal.", this, pathInfo);
return null;
}
ShortCircuitCache cache = clientContext.getShortCircuitCache();
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
InvalidToken exc = info.getInvalidTokenException();
if (exc != null) {
LOG.trace("{}: got InvalidToken exception while trying to construct " + "BlockReaderLocal via {}", this, pathInfo.getPath());
throw exc;
}
if (info.getReplica() == null) {
PerformanceAdvisory.LOG.debug("{}: failed to get " + "ShortCircuitReplica. Cannot construct " + "BlockReaderLocal via {}", this, pathInfo.getPath());
return null;
}
return new BlockReaderLocal.Builder(conf.getShortCircuitConf()).setFilename(fileName).setBlock(block).setStartOffset(startOffset).setShortCircuitReplica(info.getReplica()).setVerifyChecksum(verifyChecksum).setCachingStrategy(cachingStrategy).setStorageType(storageType).setTracer(tracer).build();
}
use of org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache in project hadoop by apache.
the class TestEnhancedByteBufferAccess method testZeroCopyMmapCache.
@Test
public void testZeroCopyMmapCache() throws Exception {
HdfsConfiguration conf = initZeroCopyTest();
MiniDFSCluster cluster = null;
final Path TEST_PATH = new Path("/a");
final int TEST_FILE_LENGTH = 5 * BLOCK_SIZE;
final int RANDOM_SEED = 23453;
final String CONTEXT = "testZeroCopyMmapCacheContext";
FSDataInputStream fsIn = null;
ByteBuffer[] results = { null, null, null, null };
DistributedFileSystem fs = null;
conf.set(HdfsClientConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, TEST_PATH, TEST_FILE_LENGTH, (short) 1, RANDOM_SEED);
try {
DFSTestUtil.waitReplication(fs, TEST_PATH, (short) 1);
} catch (InterruptedException e) {
Assert.fail("unexpected InterruptedException during " + "waitReplication: " + e);
} catch (TimeoutException e) {
Assert.fail("unexpected TimeoutException during " + "waitReplication: " + e);
}
fsIn = fs.open(TEST_PATH);
byte[] original = new byte[TEST_FILE_LENGTH];
IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
fsIn.close();
fsIn = fs.open(TEST_PATH);
final ShortCircuitCache cache = ClientContext.get(CONTEXT, conf).getShortCircuitCache();
cache.accept(new CountingVisitor(0, 5, 5, 0));
results[0] = fsIn.read(null, BLOCK_SIZE, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
fsIn.seek(0);
results[1] = fsIn.read(null, BLOCK_SIZE, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
// The mmap should be of the first block of the file.
final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
cache.accept(new CacheVisitor() {
@Override
public void visit(int numOutstandingMmaps, Map<ExtendedBlockId, ShortCircuitReplica> replicas, Map<ExtendedBlockId, InvalidToken> failedLoads, LinkedMap evictable, LinkedMap evictableMmapped) {
ShortCircuitReplica replica = replicas.get(new ExtendedBlockId(firstBlock.getBlockId(), firstBlock.getBlockPoolId()));
Assert.assertNotNull(replica);
Assert.assertTrue(replica.hasMmap());
// The replica should not yet be evictable, since we have it open.
Assert.assertNull(replica.getEvictableTimeNs());
}
});
// Read more blocks.
results[2] = fsIn.read(null, BLOCK_SIZE, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
results[3] = fsIn.read(null, BLOCK_SIZE, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
// we should have 3 mmaps, 1 evictable
cache.accept(new CountingVisitor(3, 5, 2, 0));
// using a very quick timeout)
for (ByteBuffer buffer : results) {
if (buffer != null) {
fsIn.releaseBuffer(buffer);
}
}
fsIn.close();
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
final MutableBoolean finished = new MutableBoolean(false);
cache.accept(new CacheVisitor() {
@Override
public void visit(int numOutstandingMmaps, Map<ExtendedBlockId, ShortCircuitReplica> replicas, Map<ExtendedBlockId, InvalidToken> failedLoads, LinkedMap evictable, LinkedMap evictableMmapped) {
finished.setValue(evictableMmapped.isEmpty());
}
});
return finished.booleanValue();
}
}, 10, 60000);
cache.accept(new CountingVisitor(0, -1, -1, -1));
fs.close();
cluster.shutdown();
}
use of org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache in project hadoop by apache.
the class TestEnhancedByteBufferAccess method testZeroCopyReadOfCachedData.
/**
* Test that we can zero-copy read cached data even without disabling
* checksums.
*/
@Test(timeout = 120000)
public void testZeroCopyReadOfCachedData() throws Exception {
BlockReaderTestUtil.enableShortCircuitShmTracing();
BlockReaderTestUtil.enableBlockReaderFactoryTracing();
BlockReaderTestUtil.enableHdfsCachingTracing();
final int TEST_FILE_LENGTH = BLOCK_SIZE;
final Path TEST_PATH = new Path("/a");
final int RANDOM_SEED = 23453;
HdfsConfiguration conf = initZeroCopyTest();
conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.SKIP_CHECKSUM_KEY, false);
final String CONTEXT = "testZeroCopyReadOfCachedData";
conf.set(HdfsClientConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, DFSTestUtil.roundUpToMultiple(TEST_FILE_LENGTH, (int) NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize()));
MiniDFSCluster cluster = null;
ByteBuffer result = null, result2 = null;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
FsDatasetSpi<?> fsd = cluster.getDataNodes().get(0).getFSDataset();
DistributedFileSystem fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, TEST_PATH, TEST_FILE_LENGTH, (short) 1, RANDOM_SEED);
DFSTestUtil.waitReplication(fs, TEST_PATH, (short) 1);
byte[] original = DFSTestUtil.calculateFileContentsFromSeed(RANDOM_SEED, TEST_FILE_LENGTH);
// Prior to caching, the file can't be read via zero-copy
FSDataInputStream fsIn = fs.open(TEST_PATH);
try {
result = fsIn.read(null, TEST_FILE_LENGTH / 2, EnumSet.noneOf(ReadOption.class));
Assert.fail("expected UnsupportedOperationException");
} catch (UnsupportedOperationException e) {
// expected
}
// Cache the file
fs.addCachePool(new CachePoolInfo("pool1"));
long directiveId = fs.addCacheDirective(new CacheDirectiveInfo.Builder().setPath(TEST_PATH).setReplication((short) 1).setPool("pool1").build());
int numBlocks = (int) Math.ceil((double) TEST_FILE_LENGTH / BLOCK_SIZE);
DFSTestUtil.verifyExpectedCacheUsage(DFSTestUtil.roundUpToMultiple(TEST_FILE_LENGTH, BLOCK_SIZE), numBlocks, cluster.getDataNodes().get(0).getFSDataset());
try {
result = fsIn.read(null, TEST_FILE_LENGTH, EnumSet.noneOf(ReadOption.class));
} catch (UnsupportedOperationException e) {
Assert.fail("expected to be able to read cached file via zero-copy");
}
Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, BLOCK_SIZE), byteBufferToArray(result));
// Test that files opened after the cache operation has finished
// still get the benefits of zero-copy (regression test for HDFS-6086)
FSDataInputStream fsIn2 = fs.open(TEST_PATH);
try {
result2 = fsIn2.read(null, TEST_FILE_LENGTH, EnumSet.noneOf(ReadOption.class));
} catch (UnsupportedOperationException e) {
Assert.fail("expected to be able to read cached file via zero-copy");
}
Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, BLOCK_SIZE), byteBufferToArray(result2));
fsIn2.releaseBuffer(result2);
fsIn2.close();
// check that the replica is anchored
final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
final ShortCircuitCache cache = ClientContext.get(CONTEXT, conf).getShortCircuitCache();
waitForReplicaAnchorStatus(cache, firstBlock, true, true, 1);
// Uncache the replica
fs.removeCacheDirective(directiveId);
waitForReplicaAnchorStatus(cache, firstBlock, false, true, 1);
fsIn.releaseBuffer(result);
waitForReplicaAnchorStatus(cache, firstBlock, false, false, 1);
DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);
fsIn.close();
fs.close();
cluster.shutdown();
}
use of org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache in project hadoop by apache.
the class TestBlockReaderFactory method testPurgingClosedReplicas.
/**
* When an InterruptedException is sent to a thread calling
* FileChannel#read, the FileChannel is immediately closed and the
* thread gets an exception. This effectively means that we might have
* someone asynchronously calling close() on the file descriptors we use
* in BlockReaderLocal. So when unreferencing a ShortCircuitReplica in
* ShortCircuitCache#unref, we should check if the FileChannel objects
* are still open. If not, we should purge the replica to avoid giving
* it out to any future readers.
*
* This is a regression test for HDFS-6227: Short circuit read failed
* due to ClosedChannelException.
*
* Note that you may still get ClosedChannelException errors if two threads
* are reading from the same replica and an InterruptedException is delivered
* to one of them.
*/
@Test(timeout = 120000)
public void testPurgingClosedReplicas() throws Exception {
BlockReaderTestUtil.enableBlockReaderFactoryTracing();
final AtomicInteger replicasCreated = new AtomicInteger(0);
final AtomicBoolean testFailed = new AtomicBoolean(false);
DFSInputStream.tcpReadsDisabledForTesting = true;
BlockReaderFactory.createShortCircuitReplicaInfoCallback = new ShortCircuitCache.ShortCircuitReplicaCreator() {
@Override
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
replicasCreated.incrementAndGet();
return null;
}
};
TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
Configuration conf = createShortCircuitConf("testPurgingClosedReplicas", sockDir);
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
final DistributedFileSystem dfs = cluster.getFileSystem();
final String TEST_FILE = "/test_file";
final int TEST_FILE_LEN = 4095;
final int SEED = 0xFADE0;
final DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(0), conf);
DFSTestUtil.createFile(fs, new Path(TEST_FILE), TEST_FILE_LEN, (short) 1, SEED);
final Semaphore sem = new Semaphore(0);
final List<LocatedBlock> locatedBlocks = cluster.getNameNode().getRpcServer().getBlockLocations(TEST_FILE, 0, TEST_FILE_LEN).getLocatedBlocks();
// first block
final LocatedBlock lblock = locatedBlocks.get(0);
final byte[] buf = new byte[TEST_FILE_LEN];
Runnable readerRunnable = new Runnable() {
@Override
public void run() {
try {
while (true) {
BlockReader blockReader = null;
try {
blockReader = BlockReaderTestUtil.getBlockReader(cluster.getFileSystem(), lblock, 0, TEST_FILE_LEN);
sem.release();
try {
blockReader.readAll(buf, 0, TEST_FILE_LEN);
} finally {
sem.acquireUninterruptibly();
}
} catch (ClosedByInterruptException e) {
LOG.info("got the expected ClosedByInterruptException", e);
sem.release();
break;
} finally {
if (blockReader != null)
blockReader.close();
}
LOG.info("read another " + TEST_FILE_LEN + " bytes.");
}
} catch (Throwable t) {
LOG.error("getBlockReader failure", t);
testFailed.set(true);
sem.release();
}
}
};
Thread thread = new Thread(readerRunnable);
thread.start();
// These should trigger a ClosedChannelException.
while (thread.isAlive()) {
sem.acquireUninterruptibly();
thread.interrupt();
sem.release();
}
Assert.assertFalse(testFailed.get());
// We should be able to read from the file without
// getting a ClosedChannelException.
BlockReader blockReader = null;
try {
blockReader = BlockReaderTestUtil.getBlockReader(cluster.getFileSystem(), lblock, 0, TEST_FILE_LEN);
blockReader.readFully(buf, 0, TEST_FILE_LEN);
} finally {
if (blockReader != null)
blockReader.close();
}
byte[] expected = DFSTestUtil.calculateFileContentsFromSeed(SEED, TEST_FILE_LEN);
Assert.assertTrue(Arrays.equals(buf, expected));
// Another ShortCircuitReplica object should have been created.
Assert.assertEquals(2, replicasCreated.get());
dfs.close();
cluster.shutdown();
sockDir.close();
}
Aggregations