use of org.apache.hadoop.hdfs.protocol.Block in project hadoop by apache.
the class FSNamesystem method internalReleaseLease.
/**
* Move a file that is being written to be immutable.
* @param src The filename
* @param lease The lease for the client creating the file
* @param recoveryLeaseHolder reassign lease to this holder if the last block
* needs recovery; keep current holder if null.
* @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
* replication;<br>
* RecoveryInProgressException if lease recovery is in progress.<br>
* IOException in case of an error.
* @return true if file has been successfully finalized and closed or
* false if block recovery has been initiated. Since the lease owner
* has been changed and logged, caller should call logSync().
*/
boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, String recoveryLeaseHolder) throws IOException {
LOG.info("Recovering " + lease + ", src=" + src);
assert !isInSafeMode();
assert hasWriteLock();
final INodeFile pendingFile = iip.getLastINode().asFile();
int nrBlocks = pendingFile.numBlocks();
BlockInfo[] blocks = pendingFile.getBlocks();
int nrCompleteBlocks;
BlockInfo curBlock = null;
for (nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
curBlock = blocks[nrCompleteBlocks];
if (!curBlock.isComplete())
break;
assert blockManager.hasMinStorage(curBlock) : "A COMPLETE block is not minimally replicated in " + src;
}
// then reap lease immediately and close the file.
if (nrCompleteBlocks == nrBlocks) {
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" + " internalReleaseLease: All existing blocks are COMPLETE," + " lease removed, file " + src + " closed.");
// closed!
return true;
}
// If the penultimate block is not COMPLETE, then it must be COMMITTED.
if (nrCompleteBlocks < nrBlocks - 2 || nrCompleteBlocks == nrBlocks - 2 && curBlock != null && curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
final String message = "DIR* NameSystem.internalReleaseLease: " + "attempt to release a create lock on " + src + " but file is already closed.";
NameNode.stateChangeLog.warn(message);
throw new IOException(message);
}
// The last block is not COMPLETE, and
// that the penultimate block if exists is either COMPLETE or COMMITTED
final BlockInfo lastBlock = pendingFile.getLastBlock();
BlockUCState lastBlockState = lastBlock.getBlockUCState();
BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
// If penultimate block doesn't exist then its minReplication is met
boolean penultimateBlockMinStorage = penultimateBlock == null || blockManager.hasMinStorage(penultimateBlock);
switch(lastBlockState) {
case COMPLETE:
assert false : "Already checked that the last block is incomplete";
break;
case COMMITTED:
// Close file if committed blocks are minimally replicated
if (penultimateBlockMinStorage && blockManager.hasMinStorage(lastBlock)) {
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*" + " internalReleaseLease: Committed blocks are minimally" + " replicated, lease removed, file" + src + " closed.");
// closed!
return true;
}
// Cannot close file right now, since some blocks
// are not yet minimally replicated.
// This may potentially cause infinite loop in lease recovery
// if there are no valid replicas on data-nodes.
String message = "DIR* NameSystem.internalReleaseLease: " + "Failed to release lease for file " + src + ". Committed blocks are waiting to be minimally replicated." + " Try again later.";
NameNode.stateChangeLog.warn(message);
throw new AlreadyBeingCreatedException(message);
case UNDER_CONSTRUCTION:
case UNDER_RECOVERY:
BlockUnderConstructionFeature uc = lastBlock.getUnderConstructionFeature();
// determine if last block was intended to be truncated
Block recoveryBlock = uc.getTruncateBlock();
boolean truncateRecovery = recoveryBlock != null;
boolean copyOnTruncate = truncateRecovery && recoveryBlock.getBlockId() != lastBlock.getBlockId();
assert !copyOnTruncate || recoveryBlock.getBlockId() < lastBlock.getBlockId() && recoveryBlock.getGenerationStamp() < lastBlock.getGenerationStamp() && recoveryBlock.getNumBytes() > lastBlock.getNumBytes() : "wrong recoveryBlock";
// setup the last block locations from the blockManager if not known
if (uc.getNumExpectedLocations() == 0) {
uc.setExpectedLocations(lastBlock, blockManager.getStorages(lastBlock), lastBlock.getBlockType());
}
if (uc.getNumExpectedLocations() == 0 && lastBlock.getNumBytes() == 0) {
// There is no datanode reported to this block.
// may be client have crashed before writing data to pipeline.
// This blocks doesn't need any recovery.
// We can remove this block and close the file.
pendingFile.removeLastBlock(lastBlock);
finalizeINodeFileUnderConstruction(src, pendingFile, iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " + "Removed empty last block and closed file " + src);
return true;
}
// start recovery of the last block for this file
long blockRecoveryId = nextGenerationStamp(blockManager.isLegacyBlock(lastBlock));
lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
if (copyOnTruncate) {
lastBlock.setGenerationStamp(blockRecoveryId);
} else if (truncateRecovery) {
recoveryBlock.setGenerationStamp(blockRecoveryId);
}
uc.initializeBlockRecovery(lastBlock, blockRecoveryId);
leaseManager.renewLease(lease);
// Cannot close file right now, since the last block requires recovery.
// This may potentially cause infinite loop in lease recovery
// if there are no valid replicas on data-nodes.
NameNode.stateChangeLog.warn("DIR* NameSystem.internalReleaseLease: " + "File " + src + " has not been closed." + " Lease recovery is in progress. " + "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
break;
}
return false;
}
use of org.apache.hadoop.hdfs.protocol.Block in project hadoop by apache.
the class FSImageSerialization method readINodeUnderConstruction.
// Helper function that reads in an INodeUnderConstruction
// from the input stream
//
static INodeFile readINodeUnderConstruction(DataInput in, FSNamesystem fsNamesys, int imgVersion) throws IOException {
byte[] name = readBytes(in);
long inodeId = NameNodeLayoutVersion.supports(LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong() : fsNamesys.dir.allocateNewInodeId();
short blockReplication = in.readShort();
long modificationTime = in.readLong();
long preferredBlockSize = in.readLong();
int numBlocks = in.readInt();
final BlockInfoContiguous[] blocksContiguous = new BlockInfoContiguous[numBlocks];
Block blk = new Block();
int i = 0;
for (; i < numBlocks - 1; i++) {
blk.readFields(in);
blocksContiguous[i] = new BlockInfoContiguous(blk, blockReplication);
}
// last block is UNDER_CONSTRUCTION
if (numBlocks > 0) {
blk.readFields(in);
blocksContiguous[i] = new BlockInfoContiguous(blk, blockReplication);
blocksContiguous[i].convertToBlockUnderConstruction(BlockUCState.UNDER_CONSTRUCTION, null);
}
PermissionStatus perm = PermissionStatus.read(in);
String clientName = readString(in);
String clientMachine = readString(in);
// We previously stored locations for the last block, now we
// just record that there are none
int numLocs = in.readInt();
assert numLocs == 0 : "Unexpected block locations";
// Images in the pre-protobuf format will not have the lazyPersist flag,
// so it is safe to pass false always.
INodeFile file = new INodeFile(inodeId, name, perm, modificationTime, modificationTime, blocksContiguous, blockReplication, preferredBlockSize);
file.toUnderConstruction(clientName, clientMachine);
return file;
}
use of org.apache.hadoop.hdfs.protocol.Block in project hadoop by apache.
the class FSImageSerialization method readCompactBlockArray.
public static Block[] readCompactBlockArray(DataInput in, int logVersion) throws IOException {
int num = WritableUtils.readVInt(in);
if (num < 0) {
throw new IOException("Invalid block array length: " + num);
}
Block prev = null;
Block[] ret = new Block[num];
for (int i = 0; i < num; i++) {
long id = in.readLong();
long sz = WritableUtils.readVLong(in) + ((prev != null) ? prev.getNumBytes() : 0);
long gs = WritableUtils.readVLong(in) + ((prev != null) ? prev.getGenerationStamp() : 0);
ret[i] = new Block(id, sz, gs);
prev = ret[i];
}
return ret;
}
use of org.apache.hadoop.hdfs.protocol.Block in project hadoop by apache.
the class TestGetBlocks method testBlockKey.
@Test
public void testBlockKey() {
Map<Block, Long> map = new HashMap<Block, Long>();
final Random RAN = new Random();
final long seed = RAN.nextLong();
System.out.println("seed=" + seed);
RAN.setSeed(seed);
long[] blkids = new long[10];
for (int i = 0; i < blkids.length; i++) {
blkids[i] = 1000L + RAN.nextInt(100000);
map.put(new Block(blkids[i], 0, blkids[i]), blkids[i]);
}
System.out.println("map=" + map.toString().replace(",", "\n "));
for (int i = 0; i < blkids.length; i++) {
Block b = new Block(blkids[i], 0, HdfsConstants.GRANDFATHER_GENERATION_STAMP);
Long v = map.get(b);
System.out.println(b + " => " + v);
assertEquals(blkids[i], v.longValue());
}
}
use of org.apache.hadoop.hdfs.protocol.Block in project hadoop by apache.
the class TestInjectionForSimulatedStorage method testInjection.
/* This test makes sure that NameNode retries all the available blocks
* for under replicated blocks. This test uses simulated storage and one
* of its features to inject blocks,
*
* It creates a file with several blocks and replication of 4.
* The cluster is then shut down - NN retains its state but the DNs are
* all simulated and hence loose their blocks.
* The blocks are then injected in one of the DNs. The expected behaviour is
* that the NN will arrange for themissing replica will be copied from a valid source.
*/
@Test
public void testInjection() throws IOException {
MiniDFSCluster cluster = null;
String testFile = "/replication-test-file";
Path testPath = new Path(testFile);
byte[] buffer = new byte[1024];
for (int i = 0; i < buffer.length; i++) {
buffer[i] = '1';
}
try {
Configuration conf = new HdfsConfiguration();
conf.set(DFSConfigKeys.DFS_REPLICATION_KEY, Integer.toString(numDataNodes));
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, checksumSize);
SimulatedFSDataset.setFactory(conf);
//first time format
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
cluster.waitActive();
String bpid = cluster.getNamesystem().getBlockPoolId();
DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
DFSTestUtil.createFile(cluster.getFileSystem(), testPath, filesize, filesize, blockSize, (short) numDataNodes, 0L);
waitForBlockReplication(testFile, dfsClient.getNamenode(), numDataNodes, 20);
List<Map<DatanodeStorage, BlockListAsLongs>> blocksList = cluster.getAllBlockReports(bpid);
cluster.shutdown();
cluster = null;
/* Start the MiniDFSCluster with more datanodes since once a writeBlock
* to a datanode node fails, same block can not be written to it
* immediately. In our case some replication attempts will fail.
*/
LOG.info("Restarting minicluster");
conf = new HdfsConfiguration();
SimulatedFSDataset.setFactory(conf);
conf.set(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "0.0f");
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes * 2).format(false).build();
cluster.waitActive();
Set<Block> uniqueBlocks = new HashSet<Block>();
for (Map<DatanodeStorage, BlockListAsLongs> map : blocksList) {
for (BlockListAsLongs blockList : map.values()) {
for (Block b : blockList) {
uniqueBlocks.add(new Block(b));
}
}
}
// Insert all the blocks in the first data node
LOG.info("Inserting " + uniqueBlocks.size() + " blocks");
cluster.injectBlocks(0, uniqueBlocks, null);
dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
waitForBlockReplication(testFile, dfsClient.getNamenode(), numDataNodes, -1);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
Aggregations