use of org.apache.hadoop.hdfs.server.datanode.ReplicaHandler in project hadoop by apache.
the class FsDatasetImpl method createRbw.
// FsDatasetSpi
@Override
public ReplicaHandler createRbw(StorageType storageType, ExtendedBlock b, boolean allowLazyPersist) throws IOException {
try (AutoCloseableLock lock = datasetLock.acquire()) {
ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(), b.getBlockId());
if (replicaInfo != null) {
throw new ReplicaAlreadyExistsException("Block " + b + " already exists in state " + replicaInfo.getState() + " and thus cannot be created.");
}
// create a new block
FsVolumeReference ref = null;
// significantly.
if (allowLazyPersist && lazyWriter != null && b.getNumBytes() % cacheManager.getOsPageSize() == 0 && reserveLockedMemory(b.getNumBytes())) {
try {
// First try to place the block on a transient volume.
ref = volumes.getNextTransientVolume(b.getNumBytes());
datanode.getMetrics().incrRamDiskBlocksWrite();
} catch (DiskOutOfSpaceException de) {
// Ignore the exception since we just fall back to persistent storage.
} finally {
if (ref == null) {
cacheManager.release(b.getNumBytes());
}
}
}
if (ref == null) {
ref = volumes.getNextVolume(storageType, b.getNumBytes());
}
FsVolumeImpl v = (FsVolumeImpl) ref.getVolume();
if (allowLazyPersist && !v.isTransientStorage()) {
datanode.getMetrics().incrRamDiskBlocksWriteFallback();
}
ReplicaInPipeline newReplicaInfo;
try {
newReplicaInfo = v.createRbw(b);
if (newReplicaInfo.getReplicaInfo().getState() != ReplicaState.RBW) {
throw new IOException("CreateRBW returned a replica of state " + newReplicaInfo.getReplicaInfo().getState() + " for block " + b.getBlockId());
}
} catch (IOException e) {
IOUtils.cleanup(null, ref);
throw e;
}
volumeMap.add(b.getBlockPoolId(), newReplicaInfo.getReplicaInfo());
return new ReplicaHandler(newReplicaInfo, ref);
}
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaHandler in project hadoop by apache.
the class FsDatasetImpl method recoverAppend.
// FsDatasetSpi
@Override
public ReplicaHandler recoverAppend(ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException {
LOG.info("Recover failed append to " + b);
while (true) {
try {
try (AutoCloseableLock lock = datasetLock.acquire()) {
ReplicaInfo replicaInfo = recoverCheck(b, newGS, expectedBlockLen);
FsVolumeReference ref = replicaInfo.getVolume().obtainReference();
ReplicaInPipeline replica;
try {
// change the replica's state/gs etc.
if (replicaInfo.getState() == ReplicaState.FINALIZED) {
replica = append(b.getBlockPoolId(), replicaInfo, newGS, b.getNumBytes());
} else {
//RBW
replicaInfo.bumpReplicaGS(newGS);
replica = (ReplicaInPipeline) replicaInfo;
}
} catch (IOException e) {
IOUtils.cleanup(null, ref);
throw e;
}
return new ReplicaHandler(replica, ref);
}
} catch (MustStopExistingWriter e) {
e.getReplicaInPipeline().stopWriter(datanode.getDnConf().getXceiverStopTimeout());
}
}
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaHandler in project hadoop by apache.
the class FsDatasetImpl method append.
// FsDatasetSpi
@Override
public ReplicaHandler append(ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException {
try (AutoCloseableLock lock = datasetLock.acquire()) {
// check the validity of the parameter
if (newGS < b.getGenerationStamp()) {
throw new IOException("The new generation stamp " + newGS + " should be greater than the replica " + b + "'s generation stamp");
}
ReplicaInfo replicaInfo = getReplicaInfo(b);
LOG.info("Appending to " + replicaInfo);
if (replicaInfo.getState() != ReplicaState.FINALIZED) {
throw new ReplicaNotFoundException(ReplicaNotFoundException.UNFINALIZED_REPLICA + b);
}
if (replicaInfo.getNumBytes() != expectedBlockLen) {
throw new IOException("Corrupted replica " + replicaInfo + " with a length of " + replicaInfo.getNumBytes() + " expected length is " + expectedBlockLen);
}
FsVolumeReference ref = replicaInfo.getVolume().obtainReference();
ReplicaInPipeline replica = null;
try {
replica = append(b.getBlockPoolId(), replicaInfo, newGS, b.getNumBytes());
} catch (IOException e) {
IOUtils.cleanup(null, ref);
throw e;
}
return new ReplicaHandler(replica, ref);
}
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaHandler in project hadoop by apache.
the class FsDatasetImpl method createTemporary.
// FsDatasetSpi
@Override
public ReplicaHandler createTemporary(StorageType storageType, ExtendedBlock b) throws IOException {
long startTimeMs = Time.monotonicNow();
long writerStopTimeoutMs = datanode.getDnConf().getXceiverStopTimeout();
ReplicaInfo lastFoundReplicaInfo = null;
do {
try (AutoCloseableLock lock = datasetLock.acquire()) {
ReplicaInfo currentReplicaInfo = volumeMap.get(b.getBlockPoolId(), b.getBlockId());
if (currentReplicaInfo == lastFoundReplicaInfo) {
if (lastFoundReplicaInfo != null) {
invalidate(b.getBlockPoolId(), new Block[] { lastFoundReplicaInfo });
}
FsVolumeReference ref = volumes.getNextVolume(storageType, b.getNumBytes());
FsVolumeImpl v = (FsVolumeImpl) ref.getVolume();
ReplicaInPipeline newReplicaInfo;
try {
newReplicaInfo = v.createTemporary(b);
} catch (IOException e) {
IOUtils.cleanup(null, ref);
throw e;
}
volumeMap.add(b.getBlockPoolId(), newReplicaInfo.getReplicaInfo());
return new ReplicaHandler(newReplicaInfo, ref);
} else {
if (!(currentReplicaInfo.getGenerationStamp() < b.getGenerationStamp() && (currentReplicaInfo.getState() == ReplicaState.TEMPORARY || currentReplicaInfo.getState() == ReplicaState.RBW))) {
throw new ReplicaAlreadyExistsException("Block " + b + " already exists in state " + currentReplicaInfo.getState() + " and thus cannot be created.");
}
lastFoundReplicaInfo = currentReplicaInfo;
}
}
// Hang too long, just bail out. This is not supposed to happen.
long writerStopMs = Time.monotonicNow() - startTimeMs;
if (writerStopMs > writerStopTimeoutMs) {
LOG.warn("Unable to stop existing writer for block " + b + " after " + writerStopMs + " miniseconds.");
throw new IOException("Unable to stop existing writer for block " + b + " after " + writerStopMs + " miniseconds.");
}
// Stop the previous writer
((ReplicaInPipeline) lastFoundReplicaInfo).stopWriter(writerStopTimeoutMs);
} while (true);
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaHandler in project hadoop by apache.
the class TestFileAppend method testConcurrentAppendRead.
@Test(timeout = 10000)
public void testConcurrentAppendRead() throws IOException, TimeoutException, InterruptedException {
// Create a finalized replica and append to it
// Read block data and checksum. Verify checksum.
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
conf.setInt("dfs.min.replication", 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
cluster.waitActive();
DataNode dn = cluster.getDataNodes().get(0);
FsDatasetSpi<?> dataSet = DataNodeTestUtils.getFSDataset(dn);
// create a file with 1 byte of data.
long initialFileLength = 1;
DistributedFileSystem fs = cluster.getFileSystem();
Path fileName = new Path("/appendCorruptBlock");
DFSTestUtil.createFile(fs, fileName, initialFileLength, (short) 1, 0);
DFSTestUtil.waitReplication(fs, fileName, (short) 1);
Assert.assertTrue("File not created", fs.exists(fileName));
// Call FsDatasetImpl#append to append the block file,
// which converts it to a rbw replica.
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
long newGS = block.getGenerationStamp() + 1;
ReplicaHandler replicaHandler = dataSet.append(block, newGS, initialFileLength);
// write data to block file
ReplicaBeingWritten rbw = (ReplicaBeingWritten) replicaHandler.getReplica();
ReplicaOutputStreams outputStreams = rbw.createStreams(false, DEFAULT_CHECKSUM);
OutputStream dataOutput = outputStreams.getDataOut();
byte[] appendBytes = new byte[1];
dataOutput.write(appendBytes, 0, 1);
dataOutput.flush();
dataOutput.close();
// update checksum file
final int smallBufferSize = DFSUtilClient.getSmallBufferSize(conf);
FsDatasetUtil.computeChecksum(rbw.getMetaFile(), rbw.getMetaFile(), rbw.getBlockFile(), smallBufferSize, conf);
// read the block
// the DataNode BlockSender should read from the rbw replica's in-memory
// checksum, rather than on-disk checksum. Otherwise it will see a
// checksum mismatch error.
final byte[] readBlock = DFSTestUtil.readFileBuffer(fs, fileName);
assertEquals("should have read only one byte!", 1, readBlock.length);
} finally {
cluster.shutdown();
}
}
Aggregations