use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class DFSInputStream method chooseDataNode.
private DNAddrPair chooseDataNode(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) throws IOException {
while (true) {
DNAddrPair result = getBestNodeDNAddrPair(block, ignoredNodes);
if (result != null) {
return result;
} else {
String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), deadNodes, ignoredNodes);
String blockInfo = block.getBlock() + " file=" + src;
if (failures >= dfsClient.getConf().getMaxBlockAcquireFailures()) {
String description = "Could not obtain block: " + blockInfo;
DFSClient.LOG.warn(description + errMsg + ". Throwing a BlockMissingException");
throw new BlockMissingException(src, description, block.getStartOffset());
}
DatanodeInfo[] nodes = block.getLocations();
if (nodes == null || nodes.length == 0) {
DFSClient.LOG.info("No node available for " + blockInfo);
}
DFSClient.LOG.info("Could not obtain " + block.getBlock() + " from any node: " + errMsg + ". Will get new block locations from namenode and retry...");
try {
// Introducing a random factor to the wait time before another retry.
// The wait time is dependent on # of failures and a random factor.
// At the first time of getting a BlockMissingException, the wait time
// is a random number between 0..3000 ms. If the first retry
// still fails, we will wait 3000 ms grace period before the 2nd retry.
// Also at the second retry, the waiting window is expanded to 6000 ms
// alleviating the request rate from the server. Similarly the 3rd retry
// will wait 6000ms grace period before retry and the waiting window is
// expanded to 9000ms.
final int timeWindow = dfsClient.getConf().getTimeWindow();
double waitTime = // grace period for the last round of attempt
timeWindow * failures + // expanding time window for each failure
timeWindow * (failures + 1) * ThreadLocalRandom.current().nextDouble();
DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec.");
Thread.sleep((long) waitTime);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException("Interrupted while choosing DataNode for read.");
}
//2nd option is to remove only nodes[blockId]
deadNodes.clear();
openInfo(true);
block = refreshLocatedBlock(block);
failures++;
}
}
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class DFSInputStream method getBestNodeDNAddrPair.
/**
* Get the best node from which to stream the data.
* @param block LocatedBlock, containing nodes in priority order.
* @param ignoredNodes Do not choose nodes in this array (may be null)
* @return The DNAddrPair of the best node. Null if no node can be chosen.
*/
protected DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) {
DatanodeInfo[] nodes = block.getLocations();
StorageType[] storageTypes = block.getStorageTypes();
DatanodeInfo chosenNode = null;
StorageType storageType = null;
if (nodes != null) {
for (int i = 0; i < nodes.length; i++) {
if (!deadNodes.containsKey(nodes[i]) && (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) {
chosenNode = nodes[i];
// index to get storage type.
if (storageTypes != null && i < storageTypes.length) {
storageType = storageTypes[i];
}
break;
}
}
}
if (chosenNode == null) {
reportLostBlock(block, ignoredNodes);
return null;
}
final String dnAddr = chosenNode.getXferAddr(dfsClient.getConf().isConnectToDnViaHostname());
DFSClient.LOG.debug("Connecting to datanode {}", dnAddr);
InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr);
return new DNAddrPair(chosenNode, targetAddr, storageType);
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class DFSInputStream method readBlockLength.
/** Read the block length from one of the datanodes. */
private long readBlockLength(LocatedBlock locatedblock) throws IOException {
assert locatedblock != null : "LocatedBlock cannot be null";
int replicaNotFoundCount = locatedblock.getLocations().length;
final DfsClientConf conf = dfsClient.getConf();
final int timeout = conf.getSocketTimeout();
LinkedList<DatanodeInfo> nodeList = new LinkedList<DatanodeInfo>(Arrays.asList(locatedblock.getLocations()));
LinkedList<DatanodeInfo> retryList = new LinkedList<DatanodeInfo>();
boolean isRetry = false;
StopWatch sw = new StopWatch();
while (nodeList.size() > 0) {
DatanodeInfo datanode = nodeList.pop();
ClientDatanodeProtocol cdp = null;
try {
cdp = DFSUtilClient.createClientDatanodeProtocolProxy(datanode, dfsClient.getConfiguration(), timeout, conf.isConnectToDnViaHostname(), locatedblock);
final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock());
if (n >= 0) {
return n;
}
} catch (IOException ioe) {
checkInterrupted(ioe);
if (ioe instanceof RemoteException) {
if (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException) {
// replica is not on the DN. We will treat it as 0 length
// if no one actually has a replica.
replicaNotFoundCount--;
} else if (((RemoteException) ioe).unwrapRemoteException() instanceof RetriableException) {
// add to the list to be retried if necessary.
retryList.add(datanode);
}
}
DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode {}" + " for block {}", datanode, locatedblock.getBlock(), ioe);
} finally {
if (cdp != null) {
RPC.stopProxy(cdp);
}
}
// Ran out of nodes, but there are retriable nodes.
if (nodeList.size() == 0 && retryList.size() > 0) {
nodeList.addAll(retryList);
retryList.clear();
isRetry = true;
}
if (isRetry) {
// start the stop watch if not already running.
if (!sw.isRunning()) {
sw.start();
}
try {
// delay between retries.
Thread.sleep(500);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new InterruptedIOException("Interrupted while getting the length.");
}
}
// see if we ran out of retry time
if (sw.isRunning() && sw.now(TimeUnit.MILLISECONDS) > timeout) {
break;
}
}
// on a DN that has it. we want to report that error
if (replicaNotFoundCount == 0) {
return 0;
}
throw new IOException("Cannot obtain block length for " + locatedblock);
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class DataStreamer method handleBadDatanode.
/**
* Remove bad node from list of nodes if badNodeIndex was set.
* @return true if it should continue.
*/
boolean handleBadDatanode() {
final int badNodeIndex = errorState.getBadNodeIndex();
if (badNodeIndex >= 0) {
if (nodes.length <= 1) {
lastException.set(new IOException("All datanodes " + Arrays.toString(nodes) + " are bad. Aborting..."));
streamerClosed = true;
return false;
}
LOG.warn("Error Recovery for " + block + " in pipeline " + Arrays.toString(nodes) + ": datanode " + badNodeIndex + "(" + nodes[badNodeIndex] + ") is bad.");
failed.add(nodes[badNodeIndex]);
DatanodeInfo[] newnodes = new DatanodeInfo[nodes.length - 1];
arraycopy(nodes, newnodes, badNodeIndex);
final StorageType[] newStorageTypes = new StorageType[newnodes.length];
arraycopy(storageTypes, newStorageTypes, badNodeIndex);
final String[] newStorageIDs = new String[newnodes.length];
arraycopy(storageIDs, newStorageIDs, badNodeIndex);
setPipeline(newnodes, newStorageTypes, newStorageIDs);
errorState.adjustState4RestartingNode();
lastException.clear();
}
return true;
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class DFSInputStream method seekToNewSource.
/**
* Seek to given position on a node other than the current node. If
* a node other than the current node is found, then returns true.
* If another node could not be found, then returns false.
*/
@Override
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
if (currentNode == null) {
return seekToBlockSource(targetPos);
}
boolean markedDead = deadNodes.containsKey(currentNode);
addToDeadNodes(currentNode);
DatanodeInfo oldNode = currentNode;
DatanodeInfo newNode = blockSeekTo(targetPos);
if (!markedDead) {
/* remove it from deadNodes. blockSeekTo could have cleared
* deadNodes and added currentNode again. Thats ok. */
deadNodes.remove(oldNode);
}
if (!oldNode.getDatanodeUuid().equals(newNode.getDatanodeUuid())) {
currentNode = newNode;
return true;
} else {
return false;
}
}
Aggregations