Search in sources :

Example 6 with DfsClientConf

use of org.apache.hadoop.hdfs.client.impl.DfsClientConf in project hadoop by apache.

the class DFSInputStream method hedgedFetchBlockByteRange.

/**
   * Like {@link #fetchBlockByteRange}except we start up a second, parallel,
   * 'hedged' read if the first read is taking longer than configured amount of
   * time. We then wait on which ever read returns first.
   */
private void hedgedFetchBlockByteRange(LocatedBlock block, long start, long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks) throws IOException {
    final DfsClientConf conf = dfsClient.getConf();
    ArrayList<Future<ByteBuffer>> futures = new ArrayList<>();
    CompletionService<ByteBuffer> hedgedService = new ExecutorCompletionService<>(dfsClient.getHedgedReadsThreadPool());
    ArrayList<DatanodeInfo> ignored = new ArrayList<>();
    ByteBuffer bb;
    int len = (int) (end - start + 1);
    int hedgedReadId = 0;
    block = refreshLocatedBlock(block);
    while (true) {
        // see HDFS-6591, this metric is used to verify/catch unnecessary loops
        hedgedReadOpsLoopNumForTesting++;
        DNAddrPair chosenNode = null;
        // there is no request already executing.
        if (futures.isEmpty()) {
            // chooseDataNode is a commitment. If no node, we go to
            // the NN to reget block locations. Only go here on first read.
            chosenNode = chooseDataNode(block, ignored);
            bb = ByteBuffer.allocate(len);
            Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode(chosenNode, block, start, end, bb, corruptedBlocks, hedgedReadId++);
            Future<ByteBuffer> firstRequest = hedgedService.submit(getFromDataNodeCallable);
            futures.add(firstRequest);
            try {
                Future<ByteBuffer> future = hedgedService.poll(conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS);
                if (future != null) {
                    ByteBuffer result = future.get();
                    result.flip();
                    buf.put(result);
                    return;
                }
                DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged " + "read", conf.getHedgedReadThresholdMillis(), chosenNode.info);
                // Ignore this node on next go around.
                ignored.add(chosenNode.info);
                dfsClient.getHedgedReadMetrics().incHedgedReadOps();
            // continue; no need to refresh block locations
            } catch (ExecutionException e) {
            // Ignore
            } catch (InterruptedException e) {
                throw new InterruptedIOException("Interrupted while waiting for reading task");
            }
        } else {
            // If no nodes to do hedged reads against, pass.
            try {
                chosenNode = getBestNodeDNAddrPair(block, ignored);
                if (chosenNode == null) {
                    chosenNode = chooseDataNode(block, ignored);
                }
                bb = ByteBuffer.allocate(len);
                Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode(chosenNode, block, start, end, bb, corruptedBlocks, hedgedReadId++);
                Future<ByteBuffer> oneMoreRequest = hedgedService.submit(getFromDataNodeCallable);
                futures.add(oneMoreRequest);
            } catch (IOException ioe) {
                DFSClient.LOG.debug("Failed getting node for hedged read: {}", ioe.getMessage());
            }
            // for a fixed interval and get the result from the fastest one.
            try {
                ByteBuffer result = getFirstToComplete(hedgedService, futures);
                // cancel the rest.
                cancelAll(futures);
                dfsClient.getHedgedReadMetrics().incHedgedReadWins();
                result.flip();
                buf.put(result);
                return;
            } catch (InterruptedException ie) {
            // Ignore and retry
            }
            // we found a chosenNode to hedge read against.
            if (chosenNode != null && chosenNode.info != null) {
                ignored.add(chosenNode.info);
            }
        }
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException)

Example 7 with DfsClientConf

use of org.apache.hadoop.hdfs.client.impl.DfsClientConf in project hadoop by apache.

the class DFSOutputStream method addBlock.

static LocatedBlock addBlock(DatanodeInfo[] excludedNodes, DFSClient dfsClient, String src, ExtendedBlock prevBlock, long fileId, String[] favoredNodes, EnumSet<AddBlockFlag> allocFlags) throws IOException {
    final DfsClientConf conf = dfsClient.getConf();
    int retries = conf.getNumBlockWriteLocateFollowingRetry();
    long sleeptime = conf.getBlockWriteLocateFollowingInitialDelayMs();
    long localstart = Time.monotonicNow();
    while (true) {
        try {
            return dfsClient.namenode.addBlock(src, dfsClient.clientName, prevBlock, excludedNodes, fileId, favoredNodes, allocFlags);
        } catch (RemoteException e) {
            IOException ue = e.unwrapRemoteException(FileNotFoundException.class, AccessControlException.class, NSQuotaExceededException.class, DSQuotaExceededException.class, QuotaByStorageTypeExceededException.class, UnresolvedPathException.class);
            if (ue != e) {
                // no need to retry these exceptions
                throw ue;
            }
            if (NotReplicatedYetException.class.getName().equals(e.getClassName())) {
                if (retries == 0) {
                    throw e;
                } else {
                    --retries;
                    LOG.info("Exception while adding a block", e);
                    long elapsed = Time.monotonicNow() - localstart;
                    if (elapsed > 5000) {
                        LOG.info("Waiting for replication for " + (elapsed / 1000) + " seconds");
                    }
                    try {
                        LOG.warn("NotReplicatedYetException sleeping " + src + " retries left " + retries);
                        Thread.sleep(sleeptime);
                        sleeptime *= 2;
                    } catch (InterruptedException ie) {
                        LOG.warn("Caught exception", ie);
                    }
                }
            } else {
                throw e;
            }
        }
    }
}
Also used : QuotaByStorageTypeExceededException(org.apache.hadoop.hdfs.protocol.QuotaByStorageTypeExceededException) DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) DSQuotaExceededException(org.apache.hadoop.hdfs.protocol.DSQuotaExceededException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException) SnapshotAccessControlException(org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException) NSQuotaExceededException(org.apache.hadoop.hdfs.protocol.NSQuotaExceededException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) RemoteException(org.apache.hadoop.ipc.RemoteException) UnresolvedPathException(org.apache.hadoop.hdfs.protocol.UnresolvedPathException)

Example 8 with DfsClientConf

use of org.apache.hadoop.hdfs.client.impl.DfsClientConf in project hadoop by apache.

the class DFSInputStream method openInfo.

/**
   * Grab the open-file info from namenode
   * @param refreshLocatedBlocks whether to re-fetch locatedblocks
   */
void openInfo(boolean refreshLocatedBlocks) throws IOException {
    final DfsClientConf conf = dfsClient.getConf();
    synchronized (infoLock) {
        lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(refreshLocatedBlocks);
        int retriesForLastBlockLength = conf.getRetryTimesForGetLastBlockLength();
        while (retriesForLastBlockLength > 0) {
            // retry for 3 times to get the length.
            if (lastBlockBeingWrittenLength == -1) {
                DFSClient.LOG.warn("Last block locations not available. " + "Datanodes might not have reported blocks completely." + " Will retry for " + retriesForLastBlockLength + " times");
                waitFor(conf.getRetryIntervalForGetLastBlockLength());
                lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(true);
            } else {
                break;
            }
            retriesForLastBlockLength--;
        }
        if (retriesForLastBlockLength == 0) {
            throw new IOException("Could not obtain the last block locations.");
        }
    }
}
Also used : DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 9 with DfsClientConf

use of org.apache.hadoop.hdfs.client.impl.DfsClientConf in project hadoop by apache.

the class TestBlockTokenWithDFS method tryRead.

// try reading a block using a BlockReader directly
protected void tryRead(final Configuration conf, LocatedBlock lblock, boolean shouldSucceed) {
    InetSocketAddress targetAddr = null;
    IOException ioe = null;
    BlockReader blockReader = null;
    ExtendedBlock block = lblock.getBlock();
    try {
        DatanodeInfo[] nodes = lblock.getLocations();
        targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
        blockReader = new BlockReaderFactory(new DfsClientConf(conf)).setFileName(BlockReaderFactory.getFileName(targetAddr, "test-blockpoolid", block.getBlockId())).setBlock(block).setBlockToken(lblock.getBlockToken()).setInetSocketAddress(targetAddr).setStartOffset(0).setLength(0).setVerifyChecksum(true).setClientName("TestBlockTokenWithDFS").setDatanodeInfo(nodes[0]).setCachingStrategy(CachingStrategy.newDefaultStrategy()).setClientCacheContext(ClientContext.getFromConf(conf)).setConfiguration(conf).setTracer(FsTracer.get(conf)).setRemotePeerFactory(new RemotePeerFactory() {

            @Override
            public Peer newConnectedPeer(InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException {
                Peer peer = null;
                Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
                try {
                    sock.connect(addr, HdfsConstants.READ_TIMEOUT);
                    sock.setSoTimeout(HdfsConstants.READ_TIMEOUT);
                    peer = DFSUtilClient.peerFromSocket(sock);
                } finally {
                    if (peer == null) {
                        IOUtils.closeSocket(sock);
                    }
                }
                return peer;
            }
        }).build();
    } catch (IOException ex) {
        ioe = ex;
    } finally {
        if (blockReader != null) {
            try {
                blockReader.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }
    if (shouldSucceed) {
        Assert.assertNotNull("OP_READ_BLOCK: access token is invalid, " + "when it is expected to be valid", blockReader);
    } else {
        Assert.assertNotNull("OP_READ_BLOCK: access token is valid, " + "when it is expected to be invalid", ioe);
        Assert.assertTrue("OP_READ_BLOCK failed due to reasons other than access token: ", ioe instanceof InvalidBlockTokenException);
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) InetSocketAddress(java.net.InetSocketAddress) BlockReader(org.apache.hadoop.hdfs.BlockReader) Peer(org.apache.hadoop.hdfs.net.Peer) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) IOException(java.io.IOException) DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) BlockTokenIdentifier(org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier) InvalidBlockTokenException(org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException) BlockReaderFactory(org.apache.hadoop.hdfs.client.impl.BlockReaderFactory) RemotePeerFactory(org.apache.hadoop.hdfs.RemotePeerFactory) Socket(java.net.Socket)

Example 10 with DfsClientConf

use of org.apache.hadoop.hdfs.client.impl.DfsClientConf in project hadoop by apache.

the class TestDataNodeVolumeFailure method accessBlock.

/**
   * try to access a block on a data node. If fails - throws exception
   * @param datanode
   * @param lblock
   * @throws IOException
   */
private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock) throws IOException {
    InetSocketAddress targetAddr = null;
    ExtendedBlock block = lblock.getBlock();
    targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
    BlockReader blockReader = new BlockReaderFactory(new DfsClientConf(conf)).setInetSocketAddress(targetAddr).setBlock(block).setFileName(BlockReaderFactory.getFileName(targetAddr, "test-blockpoolid", block.getBlockId())).setBlockToken(lblock.getBlockToken()).setStartOffset(0).setLength(0).setVerifyChecksum(true).setClientName("TestDataNodeVolumeFailure").setDatanodeInfo(datanode).setCachingStrategy(CachingStrategy.newDefaultStrategy()).setClientCacheContext(ClientContext.getFromConf(conf)).setConfiguration(conf).setTracer(FsTracer.get(conf)).setRemotePeerFactory(new RemotePeerFactory() {

        @Override
        public Peer newConnectedPeer(InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException {
            Peer peer = null;
            Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
            try {
                sock.connect(addr, HdfsConstants.READ_TIMEOUT);
                sock.setSoTimeout(HdfsConstants.READ_TIMEOUT);
                peer = DFSUtilClient.peerFromSocket(sock);
            } finally {
                if (peer == null) {
                    IOUtils.closeSocket(sock);
                }
            }
            return peer;
        }
    }).build();
    blockReader.close();
}
Also used : DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) InetSocketAddress(java.net.InetSocketAddress) BlockReader(org.apache.hadoop.hdfs.BlockReader) Peer(org.apache.hadoop.hdfs.net.Peer) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) BlockReaderFactory(org.apache.hadoop.hdfs.client.impl.BlockReaderFactory) Token(org.apache.hadoop.security.token.Token) RemotePeerFactory(org.apache.hadoop.hdfs.RemotePeerFactory) Socket(java.net.Socket)

Aggregations

DfsClientConf (org.apache.hadoop.hdfs.client.impl.DfsClientConf)10 IOException (java.io.IOException)7 InterruptedIOException (java.io.InterruptedIOException)5 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)4 InetSocketAddress (java.net.InetSocketAddress)3 Socket (java.net.Socket)3 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)3 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 BlockReader (org.apache.hadoop.hdfs.BlockReader)2 RemotePeerFactory (org.apache.hadoop.hdfs.RemotePeerFactory)2 BlockReaderFactory (org.apache.hadoop.hdfs.client.impl.BlockReaderFactory)2 Peer (org.apache.hadoop.hdfs.net.Peer)2 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 MultipleIOException (org.apache.hadoop.io.MultipleIOException)2 RemoteException (org.apache.hadoop.ipc.RemoteException)2 DataOutputStream (java.io.DataOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 ByteBuffer (java.nio.ByteBuffer)1 ExecutionException (java.util.concurrent.ExecutionException)1