use of org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature in project hadoop by apache.
the class FSDirTruncateOp method prepareFileForTruncate.
/**
* Convert current INode to UnderConstruction. Recreate lease. Create new
* block for the truncated copy. Schedule truncation of the replicas.
*
* @param fsn namespace
* @param iip inodes in the path containing the file
* @param leaseHolder lease holder
* @param clientMachine client machine info
* @param lastBlockDelta last block delta size
* @param newBlock new block
* @return the returned block will be written to editLog and passed back
* into this method upon loading.
* @throws IOException
*/
@VisibleForTesting
static Block prepareFileForTruncate(FSNamesystem fsn, INodesInPath iip, String leaseHolder, String clientMachine, long lastBlockDelta, Block newBlock) throws IOException {
assert fsn.hasWriteLock();
INodeFile file = iip.getLastINode().asFile();
assert !file.isStriped();
file.recordModification(iip.getLatestSnapshotId());
file.toUnderConstruction(leaseHolder, clientMachine);
assert file.isUnderConstruction() : "inode should be under construction.";
fsn.getLeaseManager().addLease(file.getFileUnderConstructionFeature().getClientName(), file.getId());
boolean shouldRecoverNow = (newBlock == null);
BlockInfo oldBlock = file.getLastBlock();
boolean shouldCopyOnTruncate = shouldCopyOnTruncate(fsn, file, oldBlock);
if (newBlock == null) {
newBlock = (shouldCopyOnTruncate) ? fsn.createNewBlock(BlockType.CONTIGUOUS) : new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(), fsn.nextGenerationStamp(fsn.getBlockManager().isLegacyBlock(oldBlock)));
}
final BlockInfo truncatedBlockUC;
BlockManager blockManager = fsn.getFSDirectory().getBlockManager();
if (shouldCopyOnTruncate) {
// Add new truncateBlock into blocksMap and
// use oldBlock as a source for copy-on-truncate recovery
truncatedBlockUC = new BlockInfoContiguous(newBlock, file.getPreferredBlockReplication());
truncatedBlockUC.convertToBlockUnderConstruction(BlockUCState.UNDER_CONSTRUCTION, blockManager.getStorages(oldBlock));
truncatedBlockUC.setNumBytes(oldBlock.getNumBytes() - lastBlockDelta);
truncatedBlockUC.getUnderConstructionFeature().setTruncateBlock(oldBlock);
file.setLastBlock(truncatedBlockUC);
blockManager.addBlockCollection(truncatedBlockUC, file);
NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: Scheduling copy-on-truncate to new" + " size {} new block {} old block {}", truncatedBlockUC.getNumBytes(), newBlock, oldBlock);
} else {
// Use new generation stamp for in-place truncate recovery
blockManager.convertLastBlockToUnderConstruction(file, lastBlockDelta);
oldBlock = file.getLastBlock();
assert !oldBlock.isComplete() : "oldBlock should be under construction";
BlockUnderConstructionFeature uc = oldBlock.getUnderConstructionFeature();
uc.setTruncateBlock(new Block(oldBlock));
uc.getTruncateBlock().setNumBytes(oldBlock.getNumBytes() - lastBlockDelta);
uc.getTruncateBlock().setGenerationStamp(newBlock.getGenerationStamp());
truncatedBlockUC = oldBlock;
NameNode.stateChangeLog.debug("BLOCK* prepareFileForTruncate: " + "{} Scheduling in-place block truncate to new size {}", uc, uc.getTruncateBlock().getNumBytes());
}
if (shouldRecoverNow) {
truncatedBlockUC.getUnderConstructionFeature().initializeBlockRecovery(truncatedBlockUC, newBlock.getGenerationStamp());
}
return newBlock;
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature in project hadoop by apache.
the class FSDirWriteFileOp method logAllocatedBlock.
private static void logAllocatedBlock(String src, BlockInfo b) {
if (!NameNode.stateChangeLog.isInfoEnabled()) {
return;
}
StringBuilder sb = new StringBuilder(150);
sb.append("BLOCK* allocate ");
b.appendStringTo(sb);
sb.append(", ");
BlockUnderConstructionFeature uc = b.getUnderConstructionFeature();
if (uc != null) {
uc.appendUCPartsConcise(sb);
}
sb.append(" for " + src);
NameNode.stateChangeLog.info(sb.toString());
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature in project hadoop by apache.
the class FSNamesystem method internalReleaseLease.
/**
* Move a file that is being written to be immutable.
* @param src The filename
* @param lease The lease for the client creating the file
* @param recoveryLeaseHolder reassign lease to this holder if the last block
* needs recovery; keep current holder if null.
* @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
* replication;<br>
* RecoveryInProgressException if lease recovery is in progress.<br>
* IOException in case of an error.
* @return true if file has been successfully finalized and closed or
* false if block recovery has been initiated. Since the lease owner
* has been changed and logged, caller should call logSync().
*/
boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, String recoveryLeaseHolder) throws IOException {
LOG.info("Recovering " + lease + ", src=" + src);
assert !isInSafeMode();
assert hasWriteLock();
final INodeFile pendingFile = iip.getLastINode().asFile();
int nrBlocks = pendingFile.numBlocks();
BlockInfo[] blocks = pendingFile.getBlocks();
int nrCompleteBlocks;
BlockInfo curBlock = null;
for (nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
curBlock = blocks[nrCompleteBlocks];
if (!curBlock.isComplete())
break;
assert blockManager.hasMinStorage(curBlock) : "A COMPLETE block is not minimally replicated in " + src;
}
// then reap lease immediately and close the file.
if (nrCompleteBlocks == nrBlocks) {
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" + " internalReleaseLease: All existing blocks are COMPLETE," + " lease removed, file " + src + " closed.");
// closed!
return true;
}
// If the penultimate block is not COMPLETE, then it must be COMMITTED.
if (nrCompleteBlocks < nrBlocks - 2 || nrCompleteBlocks == nrBlocks - 2 && curBlock != null && curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
final String message = "DIR* NameSystem.internalReleaseLease: " + "attempt to release a create lock on " + src + " but file is already closed.";
NameNode.stateChangeLog.warn(message);
throw new IOException(message);
}
// The last block is not COMPLETE, and
// that the penultimate block if exists is either COMPLETE or COMMITTED
final BlockInfo lastBlock = pendingFile.getLastBlock();
BlockUCState lastBlockState = lastBlock.getBlockUCState();
BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
// If penultimate block doesn't exist then its minReplication is met
boolean penultimateBlockMinStorage = penultimateBlock == null || blockManager.hasMinStorage(penultimateBlock);
switch(lastBlockState) {
case COMPLETE:
assert false : "Already checked that the last block is incomplete";
break;
case COMMITTED:
// Close file if committed blocks are minimally replicated
if (penultimateBlockMinStorage && blockManager.hasMinStorage(lastBlock)) {
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" + " internalReleaseLease: Committed blocks are minimally" + " replicated, lease removed, file" + src + " closed.");
// closed!
return true;
}
// Cannot close file right now, since some blocks
// are not yet minimally replicated.
// This may potentially cause infinite loop in lease recovery
// if there are no valid replicas on data-nodes.
String message = "DIR* NameSystem.internalReleaseLease: " + "Failed to release lease for file " + src + ". Committed blocks are waiting to be minimally replicated." + " Try again later.";
NameNode.stateChangeLog.warn(message);
throw new AlreadyBeingCreatedException(message);
case UNDER_CONSTRUCTION:
case UNDER_RECOVERY:
BlockUnderConstructionFeature uc = lastBlock.getUnderConstructionFeature();
// determine if last block was intended to be truncated
Block recoveryBlock = uc.getTruncateBlock();
boolean truncateRecovery = recoveryBlock != null;
boolean copyOnTruncate = truncateRecovery && recoveryBlock.getBlockId() != lastBlock.getBlockId();
assert !copyOnTruncate || recoveryBlock.getBlockId() < lastBlock.getBlockId() && recoveryBlock.getGenerationStamp() < lastBlock.getGenerationStamp() && recoveryBlock.getNumBytes() > lastBlock.getNumBytes() : "wrong recoveryBlock";
// setup the last block locations from the blockManager if not known
if (uc.getNumExpectedLocations() == 0) {
uc.setExpectedLocations(lastBlock, blockManager.getStorages(lastBlock), lastBlock.getBlockType());
}
if (uc.getNumExpectedLocations() == 0 && lastBlock.getNumBytes() == 0) {
// There is no datanode reported to this block.
// may be client have crashed before writing data to pipeline.
// This blocks doesn't need any recovery.
// We can remove this block and close the file.
pendingFile.removeLastBlock(lastBlock);
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " + "Removed empty last block and closed file " + src);
return true;
}
// start recovery of the last block for this file
long blockRecoveryId = nextGenerationStamp(blockManager.isLegacyBlock(lastBlock));
lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
if (copyOnTruncate) {
lastBlock.setGenerationStamp(blockRecoveryId);
} else if (truncateRecovery) {
recoveryBlock.setGenerationStamp(blockRecoveryId);
}
uc.initializeBlockRecovery(lastBlock, blockRecoveryId);
leaseManager.renewLease(lease);
// Cannot close file right now, since the last block requires recovery.
// This may potentially cause infinite loop in lease recovery
// if there are no valid replicas on data-nodes.
NameNode.stateChangeLog.warn("DIR* NameSystem.internalReleaseLease: " + "File " + src + " has not been closed." + " Lease recovery is in progress. " + "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
break;
}
return false;
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockUnderConstructionFeature in project hadoop by apache.
the class FSDirWriteFileOp method analyzeFileState.
private static FileState analyzeFileState(FSNamesystem fsn, INodesInPath iip, long fileId, String clientName, ExtendedBlock previous, LocatedBlock[] onRetryBlock) throws IOException {
assert fsn.hasReadLock();
String src = iip.getPath();
checkBlock(fsn, previous);
onRetryBlock[0] = null;
fsn.checkNameNodeSafeMode("Cannot add block to " + src);
// have we exceeded the configured limit of fs objects.
fsn.checkFsObjectLimit();
Block previousBlock = ExtendedBlock.getLocalBlock(previous);
final INodeFile file = fsn.checkLease(iip, clientName, fileId);
BlockInfo lastBlockInFile = file.getLastBlock();
if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
// The block that the client claims is the current last block
// doesn't match up with what we think is the last block. There are
// four possibilities:
// 1) This is the first block allocation of an append() pipeline
// which started appending exactly at or exceeding the block boundary.
// In this case, the client isn't passed the previous block,
// so it makes the allocateBlock() call with previous=null.
// We can distinguish this since the last block of the file
// will be exactly a full block.
// 2) This is a retry from a client that missed the response of a
// prior getAdditionalBlock() call, perhaps because of a network
// timeout, or because of an HA failover. In that case, we know
// by the fact that the client is re-issuing the RPC that it
// never began to write to the old block. Hence it is safe to
// to return the existing block.
// 3) This is an entirely bogus request/bug -- we should error out
// rather than potentially appending a new block with an empty
// one in the middle, etc
// 4) This is a retry from a client that timed out while
// the prior getAdditionalBlock() is still being processed,
// currently working on chooseTarget().
// There are no means to distinguish between the first and
// the second attempts in Part I, because the first one hasn't
// changed the namesystem state yet.
// We run this analysis again in Part II where case 4 is impossible.
BlockInfo penultimateBlock = file.getPenultimateBlock();
if (previous == null && lastBlockInFile != null && lastBlockInFile.getNumBytes() >= file.getPreferredBlockSize() && lastBlockInFile.isComplete()) {
// Case 1
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("BLOCK* NameSystem.allocateBlock: handling block allocation" + " writing to a file with a complete previous block: src=" + src + " lastBlock=" + lastBlockInFile);
}
} else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
if (lastBlockInFile.getNumBytes() != 0) {
throw new IOException("Request looked like a retry to allocate block " + lastBlockInFile + " but it already contains " + lastBlockInFile.getNumBytes() + " bytes");
}
// Case 2
// Return the last block.
NameNode.stateChangeLog.info("BLOCK* allocateBlock: caught retry for " + "allocation of a new block in " + src + ". Returning previously" + " allocated block " + lastBlockInFile);
long offset = file.computeFileSize();
BlockUnderConstructionFeature uc = lastBlockInFile.getUnderConstructionFeature();
onRetryBlock[0] = makeLocatedBlock(fsn, lastBlockInFile, uc.getExpectedStorageLocations(), offset);
return new FileState(file, src, iip);
} else {
// Case 3
throw new IOException("Cannot allocate block in " + src + ": " + "passed 'previous' block " + previous + " does not match actual " + "last block in file " + lastBlockInFile);
}
}
return new FileState(file, src, iip);
}
Aggregations