use of org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline in project hadoop by apache.
the class FsDatasetImpl method append.
/** Append to a finalized replica
* Change a finalized replica to be a RBW replica and
* bump its generation stamp to be the newGS
*
* @param bpid block pool Id
* @param replicaInfo a finalized replica
* @param newGS new generation stamp
* @param estimateBlockLen estimate block length
* @return a RBW replica
* @throws IOException if moving the replica from finalized directory
* to rbw directory fails
*/
private ReplicaInPipeline append(String bpid, ReplicaInfo replicaInfo, long newGS, long estimateBlockLen) throws IOException {
try (AutoCloseableLock lock = datasetLock.acquire()) {
// If the block is cached, start uncaching it.
if (replicaInfo.getState() != ReplicaState.FINALIZED) {
throw new IOException("Only a Finalized replica can be appended to; " + "Replica with blk id " + replicaInfo.getBlockId() + " has state " + replicaInfo.getState());
}
// If the block is cached, start uncaching it.
cacheManager.uncacheBlock(bpid, replicaInfo.getBlockId());
// If there are any hardlinks to the block, break them. This ensures
// we are not appending to a file that is part of a previous/ directory.
replicaInfo.breakHardLinksIfNeeded();
FsVolumeImpl v = (FsVolumeImpl) replicaInfo.getVolume();
ReplicaInPipeline rip = v.append(bpid, replicaInfo, newGS, estimateBlockLen);
if (rip.getReplicaInfo().getState() != ReplicaState.RBW) {
throw new IOException("Append on block " + replicaInfo.getBlockId() + " returned a replica of state " + rip.getReplicaInfo().getState() + "; expected RBW");
}
// Replace finalized replica by a RBW replica in replicas map
volumeMap.add(bpid, rip.getReplicaInfo());
return rip;
}
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline in project hadoop by apache.
the class FsDatasetImpl method stopAllDataxceiverThreads.
void stopAllDataxceiverThreads(FsVolumeImpl volume) {
try (AutoCloseableLock lock = datasetLock.acquire()) {
for (String blockPoolId : volumeMap.getBlockPoolList()) {
Collection<ReplicaInfo> replicas = volumeMap.replicas(blockPoolId);
for (ReplicaInfo replicaInfo : replicas) {
if ((replicaInfo.getState() == ReplicaState.TEMPORARY || replicaInfo.getState() == ReplicaState.RBW) && replicaInfo.getVolume().equals(volume)) {
ReplicaInPipeline replicaInPipeline = (ReplicaInPipeline) replicaInfo;
replicaInPipeline.interruptThread();
}
}
}
}
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline in project hadoop by apache.
the class FsDatasetImpl method initReplicaRecoveryImpl.
static ReplicaRecoveryInfo initReplicaRecoveryImpl(String bpid, ReplicaMap map, Block block, long recoveryId) throws IOException, MustStopExistingWriter {
final ReplicaInfo replica = map.get(bpid, block.getBlockId());
LOG.info("initReplicaRecovery: " + block + ", recoveryId=" + recoveryId + ", replica=" + replica);
//check replica
if (replica == null) {
return null;
}
//stop writer if there is any
if (replica.getState() == ReplicaState.TEMPORARY || replica.getState() == ReplicaState.RBW) {
final ReplicaInPipeline rip = (ReplicaInPipeline) replica;
if (!rip.attemptToSetWriter(null, Thread.currentThread())) {
throw new MustStopExistingWriter(rip);
}
//check replica bytes on disk.
if (replica.getBytesOnDisk() < replica.getVisibleLength()) {
throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " getBytesOnDisk() < getVisibleLength(), rip=" + replica);
}
//check the replica's files
checkReplicaFiles(replica);
}
//check generation stamp
if (replica.getGenerationStamp() < block.getGenerationStamp()) {
throw new IOException("replica.getGenerationStamp() < block.getGenerationStamp(), block=" + block + ", replica=" + replica);
}
//check recovery id
if (replica.getGenerationStamp() >= recoveryId) {
throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " replica.getGenerationStamp() >= recoveryId = " + recoveryId + ", block=" + block + ", replica=" + replica);
}
//check RUR
final ReplicaInfo rur;
if (replica.getState() == ReplicaState.RUR) {
rur = replica;
if (rur.getRecoveryID() >= recoveryId) {
throw new RecoveryInProgressException("rur.getRecoveryID() >= recoveryId = " + recoveryId + ", block=" + block + ", rur=" + rur);
}
final long oldRecoveryID = rur.getRecoveryID();
rur.setRecoveryID(recoveryId);
LOG.info("initReplicaRecovery: update recovery id for " + block + " from " + oldRecoveryID + " to " + recoveryId);
} else {
rur = new ReplicaBuilder(ReplicaState.RUR).from(replica).setRecoveryId(recoveryId).build();
map.add(bpid, rur);
LOG.info("initReplicaRecovery: changing replica state for " + block + " from " + replica.getState() + " to " + rur.getState());
}
return rur.createInfo();
}
use of org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline in project hadoop by apache.
the class TestClientProtocolForPipelineRecovery method testPipelineRecoveryWithTransferBlock.
// Test to verify that blocks are no longer corrupted after HDFS-4660.
// Revert HDFS-4660 and the other related ones (HDFS-9220, HDFS-8722), this
// test would fail.
// Scenario: Prior to the fix, block get corrupted when the transferBlock
// happens during pipeline recovery with extra bytes to make up the end of
// chunk.
// For verification, Need to fail the pipeline for last datanode when the
// second datanode have more bytes on disk than already acked bytes.
// This will enable to transfer extra bytes to the newNode to makeup
// end-of-chunk during pipeline recovery. This is achieved by the customized
// DataNodeFaultInjector class in this test.
// For detailed info, please refer to HDFS-4660 and HDFS-10587. HDFS-9220
// fixes an issue in HDFS-4660 patch, and HDFS-8722 is an optimization.
@Test
public void testPipelineRecoveryWithTransferBlock() throws Exception {
final int chunkSize = 512;
final int oneWriteSize = 5000;
final int totalSize = 1024 * 1024;
final int errorInjectionPos = 512;
Configuration conf = new HdfsConfiguration();
// Need 4 datanodes to verify the replaceDatanode during pipeline recovery
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
DataNodeFaultInjector old = DataNodeFaultInjector.get();
try {
DistributedFileSystem fs = cluster.getFileSystem();
Path fileName = new Path("/f");
FSDataOutputStream o = fs.create(fileName);
int count = 0;
// Flush to get the pipeline created.
o.writeBytes("hello");
o.hflush();
DFSOutputStream dfsO = (DFSOutputStream) o.getWrappedStream();
final DatanodeInfo[] pipeline = dfsO.getStreamer().getNodes();
final String lastDn = pipeline[2].getXferAddr(false);
final AtomicBoolean failed = new AtomicBoolean(false);
DataNodeFaultInjector.set(new DataNodeFaultInjector() {
@Override
public void failPipeline(ReplicaInPipeline replicaInfo, String mirror) throws IOException {
if (!lastDn.equals(mirror)) {
// Only fail for second DN
return;
}
if (!failed.get() && (replicaInfo.getBytesAcked() > errorInjectionPos) && (replicaInfo.getBytesAcked() % chunkSize != 0)) {
int count = 0;
while (count < 10) {
// described in HDFS-4660 would occur.
if ((replicaInfo.getBytesOnDisk() / chunkSize) - (replicaInfo.getBytesAcked() / chunkSize) >= 1) {
failed.set(true);
throw new IOException("Failing Pipeline " + replicaInfo.getBytesAcked() + " : " + replicaInfo.getBytesOnDisk());
}
try {
Thread.sleep(200);
} catch (InterruptedException e) {
}
count++;
}
}
}
});
Random r = new Random();
byte[] b = new byte[oneWriteSize];
while (count < totalSize) {
r.nextBytes(b);
o.write(b);
count += oneWriteSize;
o.hflush();
}
assertTrue("Expected a failure in the pipeline", failed.get());
DatanodeInfo[] newNodes = dfsO.getStreamer().getNodes();
o.close();
// Trigger block report to NN
for (DataNode d : cluster.getDataNodes()) {
DataNodeTestUtils.triggerBlockReport(d);
}
// Read from the replaced datanode to verify the corruption. So shutdown
// all other nodes in the pipeline.
List<DatanodeInfo> pipelineList = Arrays.asList(pipeline);
DatanodeInfo newNode = null;
for (DatanodeInfo node : newNodes) {
if (!pipelineList.contains(node)) {
newNode = node;
break;
}
}
LOG.info("Number of nodes in pipeline: {} newNode {}", newNodes.length, newNode.getName());
// shutdown old 2 nodes
for (int i = 0; i < newNodes.length; i++) {
if (newNodes[i].getName().equals(newNode.getName())) {
continue;
}
LOG.info("shutdown {}", newNodes[i].getName());
cluster.stopDataNode(newNodes[i].getName());
}
// Read should be successfull from only the newNode. There should not be
// any corruption reported.
DFSTestUtil.readFile(fs, fileName);
} finally {
DataNodeFaultInjector.set(old);
cluster.shutdown();
}
}
Aggregations