Search in sources :

Example 51 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class DFSInputStream method chooseDataNode.

private DNAddrPair chooseDataNode(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) throws IOException {
    while (true) {
        DNAddrPair result = getBestNodeDNAddrPair(block, ignoredNodes);
        if (result != null) {
            return result;
        } else {
            String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), deadNodes, ignoredNodes);
            String blockInfo = block.getBlock() + " file=" + src;
            if (failures >= dfsClient.getConf().getMaxBlockAcquireFailures()) {
                String description = "Could not obtain block: " + blockInfo;
                DFSClient.LOG.warn(description + errMsg + ". Throwing a BlockMissingException");
                throw new BlockMissingException(src, description, block.getStartOffset());
            }
            DatanodeInfo[] nodes = block.getLocations();
            if (nodes == null || nodes.length == 0) {
                DFSClient.LOG.info("No node available for " + blockInfo);
            }
            DFSClient.LOG.info("Could not obtain " + block.getBlock() + " from any node: " + errMsg + ". Will get new block locations from namenode and retry...");
            try {
                // Introducing a random factor to the wait time before another retry.
                // The wait time is dependent on # of failures and a random factor.
                // At the first time of getting a BlockMissingException, the wait time
                // is a random number between 0..3000 ms. If the first retry
                // still fails, we will wait 3000 ms grace period before the 2nd retry.
                // Also at the second retry, the waiting window is expanded to 6000 ms
                // alleviating the request rate from the server. Similarly the 3rd retry
                // will wait 6000ms grace period before retry and the waiting window is
                // expanded to 9000ms.
                final int timeWindow = dfsClient.getConf().getTimeWindow();
                double waitTime = // grace period for the last round of attempt
                timeWindow * failures + // expanding time window for each failure
                timeWindow * (failures + 1) * ThreadLocalRandom.current().nextDouble();
                DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec.");
                Thread.sleep((long) waitTime);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new InterruptedIOException("Interrupted while choosing DataNode for read.");
            }
            //2nd option is to remove only nodes[blockId]
            deadNodes.clear();
            openInfo(true);
            block = refreshLocatedBlock(block);
            failures++;
        }
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo)

Example 52 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class DFSInputStream method getBestNodeDNAddrPair.

/**
   * Get the best node from which to stream the data.
   * @param block LocatedBlock, containing nodes in priority order.
   * @param ignoredNodes Do not choose nodes in this array (may be null)
   * @return The DNAddrPair of the best node. Null if no node can be chosen.
   */
protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) {
    DatanodeInfo[] nodes = block.getLocations();
    StorageType[] storageTypes = block.getStorageTypes();
    DatanodeInfo chosenNode = null;
    StorageType storageType = null;
    if (nodes != null) {
        for (int i = 0; i < nodes.length; i++) {
            if (!deadNodes.containsKey(nodes[i]) && (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) {
                chosenNode = nodes[i];
                // index to get storage type.
                if (storageTypes != null && i < storageTypes.length) {
                    storageType = storageTypes[i];
                }
                break;
            }
        }
    }
    if (chosenNode == null) {
        reportLostBlock(block, ignoredNodes);
        return null;
    }
    final String dnAddr = chosenNode.getXferAddr(dfsClient.getConf().isConnectToDnViaHostname());
    DFSClient.LOG.debug("Connecting to datanode {}", dnAddr);
    InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr);
    return new DNAddrPair(chosenNode, targetAddr, storageType);
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) StorageType(org.apache.hadoop.fs.StorageType) InetSocketAddress(java.net.InetSocketAddress)

Example 53 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class DFSInputStream method readBlockLength.

/** Read the block length from one of the datanodes. */
private long readBlockLength(LocatedBlock locatedblock) throws IOException {
    assert locatedblock != null : "LocatedBlock cannot be null";
    int replicaNotFoundCount = locatedblock.getLocations().length;
    final DfsClientConf conf = dfsClient.getConf();
    final int timeout = conf.getSocketTimeout();
    LinkedList<DatanodeInfo> nodeList = new LinkedList<DatanodeInfo>(Arrays.asList(locatedblock.getLocations()));
    LinkedList<DatanodeInfo> retryList = new LinkedList<DatanodeInfo>();
    boolean isRetry = false;
    StopWatch sw = new StopWatch();
    while (nodeList.size() > 0) {
        DatanodeInfo datanode = nodeList.pop();
        ClientDatanodeProtocol cdp = null;
        try {
            cdp = DFSUtilClient.createClientDatanodeProtocolProxy(datanode, dfsClient.getConfiguration(), timeout, conf.isConnectToDnViaHostname(), locatedblock);
            final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock());
            if (n >= 0) {
                return n;
            }
        } catch (IOException ioe) {
            checkInterrupted(ioe);
            if (ioe instanceof RemoteException) {
                if (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException) {
                    // replica is not on the DN. We will treat it as 0 length
                    // if no one actually has a replica.
                    replicaNotFoundCount--;
                } else if (((RemoteException) ioe).unwrapRemoteException() instanceof RetriableException) {
                    // add to the list to be retried if necessary.
                    retryList.add(datanode);
                }
            }
            DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode {}" + " for block {}", datanode, locatedblock.getBlock(), ioe);
        } finally {
            if (cdp != null) {
                RPC.stopProxy(cdp);
            }
        }
        // Ran out of nodes, but there are retriable nodes.
        if (nodeList.size() == 0 && retryList.size() > 0) {
            nodeList.addAll(retryList);
            retryList.clear();
            isRetry = true;
        }
        if (isRetry) {
            // start the stop watch if not already running.
            if (!sw.isRunning()) {
                sw.start();
            }
            try {
                // delay between retries.
                Thread.sleep(500);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new InterruptedIOException("Interrupted while getting the length.");
            }
        }
        // see if we ran out of retry time
        if (sw.isRunning() && sw.now(TimeUnit.MILLISECONDS) > timeout) {
            break;
        }
    }
    // on a DN that has it.  we want to report that error
    if (replicaNotFoundCount == 0) {
        return 0;
    }
    throw new IOException("Cannot obtain block length for " + locatedblock);
}
Also used : InterruptedIOException(java.io.InterruptedIOException) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) ReplicaNotFoundException(org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ClientDatanodeProtocol(org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol) LinkedList(java.util.LinkedList) StopWatch(org.apache.hadoop.util.StopWatch) DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) RemoteException(org.apache.hadoop.ipc.RemoteException) RetriableException(org.apache.hadoop.ipc.RetriableException)

Example 54 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class DataStreamer method handleBadDatanode.

/**
   * Remove bad node from list of nodes if badNodeIndex was set.
   * @return true if it should continue.
   */
boolean handleBadDatanode() {
    final int badNodeIndex = errorState.getBadNodeIndex();
    if (badNodeIndex >= 0) {
        if (nodes.length <= 1) {
            lastException.set(new IOException("All datanodes " + Arrays.toString(nodes) + " are bad. Aborting..."));
            streamerClosed = true;
            return false;
        }
        LOG.warn("Error Recovery for " + block + " in pipeline " + Arrays.toString(nodes) + ": datanode " + badNodeIndex + "(" + nodes[badNodeIndex] + ") is bad.");
        failed.add(nodes[badNodeIndex]);
        DatanodeInfo[] newnodes = new DatanodeInfo[nodes.length - 1];
        arraycopy(nodes, newnodes, badNodeIndex);
        final StorageType[] newStorageTypes = new StorageType[newnodes.length];
        arraycopy(storageTypes, newStorageTypes, badNodeIndex);
        final String[] newStorageIDs = new String[newnodes.length];
        arraycopy(storageIDs, newStorageIDs, badNodeIndex);
        setPipeline(newnodes, newStorageTypes, newStorageIDs);
        errorState.adjustState4RestartingNode();
        lastException.clear();
    }
    return true;
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) StorageType(org.apache.hadoop.fs.StorageType) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException)

Example 55 with DatanodeInfo

use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.

the class DFSInputStream method seekToNewSource.

/**
   * Seek to given position on a node other than the current node.  If
   * a node other than the current node is found, then returns true.
   * If another node could not be found, then returns false.
   */
@Override
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
    if (currentNode == null) {
        return seekToBlockSource(targetPos);
    }
    boolean markedDead = deadNodes.containsKey(currentNode);
    addToDeadNodes(currentNode);
    DatanodeInfo oldNode = currentNode;
    DatanodeInfo newNode = blockSeekTo(targetPos);
    if (!markedDead) {
        /* remove it from deadNodes. blockSeekTo could have cleared
       * deadNodes and added currentNode again. Thats ok. */
        deadNodes.remove(oldNode);
    }
    if (!oldNode.getDatanodeUuid().equals(newNode.getDatanodeUuid())) {
        currentNode = newNode;
        return true;
    } else {
        return false;
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo)

Aggregations

DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)214 Test (org.junit.Test)103 Path (org.apache.hadoop.fs.Path)91 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)73 IOException (java.io.IOException)47 FileSystem (org.apache.hadoop.fs.FileSystem)44 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)43 ArrayList (java.util.ArrayList)39 Configuration (org.apache.hadoop.conf.Configuration)38 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)37 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)32 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)32 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)29 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)27 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)25 InetSocketAddress (java.net.InetSocketAddress)20 LocatedStripedBlock (org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)20 StorageType (org.apache.hadoop.fs.StorageType)18 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)14 DatanodeInfoBuilder (org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder)14