use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class DFSInputStream method reportCheckSumFailure.
/**
* DFSInputStream reports checksum failure.
* For replicated blocks, we have the following logic:
* Case I : client has tried multiple data nodes and at least one of the
* attempts has succeeded. We report the other failures as corrupted block to
* namenode.
* Case II: client has tried out all data nodes, but all failed. We
* only report if the total number of replica is 1. We do not
* report otherwise since this maybe due to the client is a handicapped client
* (who can not read).
*
* For erasure-coded blocks, each block in corruptedBlockMap is an internal
* block in a block group, and there is usually only one DataNode
* corresponding to each internal block. For this case we simply report the
* corrupted blocks to NameNode and ignore the above logic.
*
* @param corruptedBlocks map of corrupted blocks
* @param dataNodeCount number of data nodes who contains the block replicas
*/
protected void reportCheckSumFailure(CorruptedBlocks corruptedBlocks, int dataNodeCount, boolean isStriped) {
Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap = corruptedBlocks.getCorruptionMap();
if (corruptedBlockMap.isEmpty()) {
return;
}
List<LocatedBlock> reportList = new ArrayList<>(corruptedBlockMap.size());
for (Map.Entry<ExtendedBlock, Set<DatanodeInfo>> entry : corruptedBlockMap.entrySet()) {
ExtendedBlock blk = entry.getKey();
Set<DatanodeInfo> dnSet = entry.getValue();
if (isStriped || ((dnSet.size() < dataNodeCount) && (dnSet.size() > 0)) || ((dataNodeCount == 1) && (dnSet.size() == dataNodeCount))) {
DatanodeInfo[] locs = new DatanodeInfo[dnSet.size()];
int i = 0;
for (DatanodeInfo dn : dnSet) {
locs[i++] = dn;
}
reportList.add(new LocatedBlock(blk, locs));
}
}
if (reportList.size() > 0) {
dfsClient.reportChecksumFailure(src, reportList.toArray(new LocatedBlock[reportList.size()]));
}
corruptedBlockMap.clear();
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class DataStreamer method updatePipeline.
/** update pipeline at the namenode */
private void updatePipeline(long newGS) throws IOException {
final ExtendedBlock oldBlock = block.getCurrentBlock();
// the new GS has been propagated to all DN, it should be ok to update the
// local block state
updateBlockGS(newGS);
dfsClient.namenode.updatePipeline(dfsClient.clientName, oldBlock, block.getCurrentBlock(), nodes, storageIDs);
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class PBHelper method convertBlockECReconstructionInfo.
public static BlockECReconstructionInfo convertBlockECReconstructionInfo(BlockECReconstructionInfoProto blockEcReconstructionInfoProto) {
ExtendedBlockProto blockProto = blockEcReconstructionInfoProto.getBlock();
ExtendedBlock block = PBHelperClient.convert(blockProto);
DatanodeInfosProto sourceDnInfosProto = blockEcReconstructionInfoProto.getSourceDnInfos();
DatanodeInfo[] sourceDnInfos = PBHelperClient.convert(sourceDnInfosProto);
DatanodeInfosProto targetDnInfosProto = blockEcReconstructionInfoProto.getTargetDnInfos();
DatanodeInfo[] targetDnInfos = PBHelperClient.convert(targetDnInfosProto);
HdfsProtos.StorageUuidsProto targetStorageUuidsProto = blockEcReconstructionInfoProto.getTargetStorageUuids();
String[] targetStorageUuids = convert(targetStorageUuidsProto);
StorageTypesProto targetStorageTypesProto = blockEcReconstructionInfoProto.getTargetStorageTypes();
StorageType[] convertStorageTypes = PBHelperClient.convertStorageTypes(targetStorageTypesProto.getStorageTypesList(), targetStorageTypesProto.getStorageTypesList().size());
byte[] liveBlkIndices = blockEcReconstructionInfoProto.getLiveBlockIndices().toByteArray();
ErasureCodingPolicy ecPolicy = PBHelperClient.convertErasureCodingPolicy(blockEcReconstructionInfoProto.getEcPolicy());
return new BlockECReconstructionInfo(block, sourceDnInfos, targetDnInfos, targetStorageUuids, convertStorageTypes, liveBlkIndices, ecPolicy);
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class NamenodeFsck method copyBlock.
/*
* XXX (ab) Bulk of this method is copied verbatim from {@link DFSClient}, which is
* bad. Both places should be refactored to provide a method to copy blocks
* around.
*/
private void copyBlock(final DFSClient dfs, LocatedBlock lblock, OutputStream fos) throws Exception {
int failures = 0;
InetSocketAddress targetAddr = null;
TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
BlockReader blockReader = null;
ExtendedBlock block = lblock.getBlock();
while (blockReader == null) {
DatanodeInfo chosenNode;
try {
chosenNode = bestNode(dfs, lblock.getLocations(), deadNodes);
targetAddr = NetUtils.createSocketAddr(chosenNode.getXferAddr());
} catch (IOException ie) {
if (failures >= HdfsClientConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT) {
throw new IOException("Could not obtain block " + lblock, ie);
}
LOG.info("Could not obtain block from any node: " + ie);
try {
Thread.sleep(10000);
} catch (InterruptedException iex) {
}
deadNodes.clear();
failures++;
continue;
}
try {
String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(), block.getBlockId());
blockReader = new BlockReaderFactory(dfs.getConf()).setFileName(file).setBlock(block).setBlockToken(lblock.getBlockToken()).setStartOffset(0).setLength(block.getNumBytes()).setVerifyChecksum(true).setClientName("fsck").setDatanodeInfo(chosenNode).setInetSocketAddress(targetAddr).setCachingStrategy(CachingStrategy.newDropBehind()).setClientCacheContext(dfs.getClientContext()).setConfiguration(namenode.getConf()).setTracer(tracer).setRemotePeerFactory(new RemotePeerFactory() {
@Override
public Peer newConnectedPeer(InetSocketAddress addr, Token<BlockTokenIdentifier> blockToken, DatanodeID datanodeId) throws IOException {
Peer peer = null;
Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
try {
s.connect(addr, HdfsConstants.READ_TIMEOUT);
s.setSoTimeout(HdfsConstants.READ_TIMEOUT);
peer = DFSUtilClient.peerFromSocketAndKey(dfs.getSaslDataTransferClient(), s, NamenodeFsck.this, blockToken, datanodeId, HdfsConstants.READ_TIMEOUT);
} finally {
if (peer == null) {
IOUtils.closeQuietly(s);
}
}
return peer;
}
}).build();
} catch (IOException ex) {
// Put chosen node into dead list, continue
LOG.info("Failed to connect to " + targetAddr + ":" + ex);
deadNodes.add(chosenNode);
}
}
byte[] buf = new byte[1024];
int cnt = 0;
boolean success = true;
long bytesRead = 0;
try {
while ((cnt = blockReader.read(buf, 0, buf.length)) > 0) {
fos.write(buf, 0, cnt);
bytesRead += cnt;
}
if (bytesRead != block.getNumBytes()) {
throw new IOException("Recorded block size is " + block.getNumBytes() + ", but datanode returned " + bytesRead + " bytes");
}
} catch (Exception e) {
LOG.error("Error reading block", e);
success = false;
} finally {
blockReader.close();
}
if (!success) {
throw new Exception("Could not copy block data for " + lblock.getBlock());
}
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class TestDecommission method testRecommission.
/**
* Test that over-replicated blocks are deleted on recommission.
*/
@Test(timeout = 120000)
public void testRecommission() throws Exception {
final int numDatanodes = 6;
try {
LOG.info("Starting test testRecommission");
startCluster(1, numDatanodes);
final Path file1 = new Path("testDecommission.dat");
final int replicas = numDatanodes - 1;
ArrayList<DatanodeInfo> decommissionedNodes = Lists.newArrayList();
final FileSystem fileSys = getCluster().getFileSystem();
// Write a file to n-1 datanodes
writeFile(fileSys, file1, replicas);
// Decommission one of the datanodes with a replica
BlockLocation loc = fileSys.getFileBlockLocations(file1, 0, 1)[0];
assertEquals("Unexpected number of replicas from getFileBlockLocations", replicas, loc.getHosts().length);
final String toDecomHost = loc.getNames()[0];
String toDecomUuid = null;
for (DataNode d : getCluster().getDataNodes()) {
if (d.getDatanodeId().getXferAddr().equals(toDecomHost)) {
toDecomUuid = d.getDatanodeId().getDatanodeUuid();
break;
}
}
assertNotNull("Could not find a dn with the block!", toDecomUuid);
final DatanodeInfo decomNode = takeNodeOutofService(0, toDecomUuid, 0, decommissionedNodes, AdminStates.DECOMMISSIONED);
decommissionedNodes.add(decomNode);
final BlockManager blockManager = getCluster().getNamesystem().getBlockManager();
final DatanodeManager datanodeManager = blockManager.getDatanodeManager();
BlockManagerTestUtil.recheckDecommissionState(datanodeManager);
// Ensure decommissioned datanode is not automatically shutdown
DFSClient client = getDfsClient(0);
assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
// wait for the block to be replicated
final ExtendedBlock b = DFSTestUtil.getFirstBlock(fileSys, file1);
final String uuid = toDecomUuid;
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
BlockInfo info = blockManager.getStoredBlock(b.getLocalBlock());
int count = 0;
StringBuilder sb = new StringBuilder("Replica locations: ");
for (int i = 0; i < info.numNodes(); i++) {
DatanodeDescriptor dn = info.getDatanode(i);
sb.append(dn + ", ");
if (!dn.getDatanodeUuid().equals(uuid)) {
count++;
}
}
LOG.info(sb.toString());
LOG.info("Count: " + count);
return count == replicas;
}
}, 500, 30000);
// redecommission and wait for over-replication to be fixed
putNodeInService(0, decomNode);
BlockManagerTestUtil.recheckDecommissionState(datanodeManager);
DFSTestUtil.waitForReplication(getCluster(), b, 1, replicas, 0);
cleanupFile(fileSys, file1);
} finally {
shutdownCluster();
}
}
Aggregations