use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class TestBlockRecovery method testStopWorker.
/**
* Test that an FsDatasetImpl operation does not hold the lock for an
* unreasonable amount of time if a writer is taking a long time to stop.
*/
private void testStopWorker(final TestStopWorkerRunnable tswr) throws Exception {
LOG.debug("Running " + currentTestName.getMethodName());
// We need a long value for the data xceiver stop timeout.
// Otherwise the timeout will trigger, and we will not have tested that
// thread join was done locklessly.
Assert.assertEquals(TEST_STOP_WORKER_XCEIVER_STOP_TIMEOUT_MILLIS, dn.getDnConf().getXceiverStopTimeout());
final TestStopWorkerSemaphore progressParent = new TestStopWorkerSemaphore();
final TestStopWorkerSemaphore terminateSlowWriter = new TestStopWorkerSemaphore();
final AtomicReference<String> failure = new AtomicReference<String>(null);
Collection<RecoveringBlock> recoveringBlocks = initRecoveringBlocks();
final RecoveringBlock recoveringBlock = Iterators.get(recoveringBlocks.iterator(), 0);
final ExtendedBlock block = recoveringBlock.getBlock();
Thread slowWriterThread = new Thread(new Runnable() {
@Override
public void run() {
try {
// Register this thread as the writer for the recoveringBlock.
LOG.debug("slowWriter creating rbw");
ReplicaHandler replicaHandler = spyDN.data.createRbw(StorageType.DISK, block, false);
replicaHandler.close();
LOG.debug("slowWriter created rbw");
// Tell the parent thread to start progressing.
progressParent.sem.release();
terminateSlowWriter.uninterruptiblyAcquire(60000);
LOG.debug("slowWriter exiting");
} catch (Throwable t) {
LOG.error("slowWriter got exception", t);
failure.compareAndSet(null, "slowWriter got exception " + t.getMessage());
}
}
});
// Start the slow worker thread and wait for it to take ownership of the
// ReplicaInPipeline
slowWriterThread.start();
progressParent.uninterruptiblyAcquire(60000);
// Start a worker thread which will attempt to stop the writer.
Thread stopWriterThread = new Thread(new Runnable() {
@Override
public void run() {
try {
LOG.debug("initiating " + tswr.opName());
tswr.run(recoveringBlock);
LOG.debug("finished " + tswr.opName());
} catch (Throwable t) {
LOG.error("stopWriterThread got unexpected exception for " + tswr.opName(), t);
failure.compareAndSet(null, "stopWriterThread got unexpected " + "exception for " + tswr.opName() + ": " + t.getMessage());
}
}
});
stopWriterThread.start();
while (!terminateSlowWriter.gotInterruption.get()) {
// Wait until stopWriterThread attempts to stop our slow writer by sending
// it an InterruptedException.
Thread.sleep(1);
}
// We know that stopWriterThread is in the process of joining our slow
// writer. It must not hold the lock during this operation.
// In order to test that it does not, we attempt to do an operation that
// requires the lock-- getReplicaString.
spyDN.getFSDataset().getReplicaString(recoveringBlock.getBlock().getBlockPoolId(), recoveringBlock.getBlock().getBlockId());
// Tell the slow writer to exit, and then wait for all threads to join.
terminateSlowWriter.sem.release();
slowWriterThread.join();
stopWriterThread.join();
// Check that our worker threads exited cleanly. This is not checked by the
// unit test framework, so we have to do it manually here.
String failureReason = failure.get();
if (failureReason != null) {
Assert.fail("Thread failure: " + failureReason);
}
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class SimulatedFSDataset method initReplicaRecovery.
@Override
public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock) throws IOException {
ExtendedBlock b = rBlock.getBlock();
final Map<Block, BInfo> map = getMap(b.getBlockPoolId());
BInfo binfo = map.get(b.getLocalBlock());
if (binfo == null) {
throw new IOException("No such Block " + b);
}
return new ReplicaRecoveryInfo(binfo.getBlockId(), binfo.getBytesOnDisk(), binfo.getGenerationStamp(), binfo.isFinalized() ? ReplicaState.FINALIZED : ReplicaState.RBW);
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class SimulatedFSDataset method convertTemporaryToRbw.
@Override
public ReplicaInPipeline convertTemporaryToRbw(ExtendedBlock temporary) throws IOException {
final Map<Block, BInfo> map = blockMap.get(temporary.getBlockPoolId());
if (map == null) {
throw new IOException("Block pool not found, temporary=" + temporary);
}
final BInfo r = map.get(temporary.getLocalBlock());
if (r == null) {
throw new IOException("Block not found, temporary=" + temporary);
} else if (r.isFinalized()) {
throw new IOException("Replica already finalized, temporary=" + temporary + ", r=" + r);
}
return r;
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class SimulatedFSDataset method injectBlocks.
public synchronized void injectBlocks(String bpid, Iterable<? extends Block> injectBlocks) throws IOException {
ExtendedBlock blk = new ExtendedBlock();
if (injectBlocks != null) {
for (Block b : injectBlocks) {
// if any blocks in list is bad, reject list
if (b == null) {
throw new NullPointerException("Null blocks in block list");
}
blk.set(bpid, b);
if (isValidBlock(blk)) {
throw new IOException("Block already exists in block list");
}
}
Map<Block, BInfo> map = blockMap.get(bpid);
if (map == null) {
map = new TreeMap<>();
blockMap.put(bpid, map);
}
for (Block b : injectBlocks) {
BInfo binfo = new BInfo(bpid, b, false);
map.put(binfo.theBlock, binfo);
}
}
}
use of org.apache.hadoop.hdfs.protocol.ExtendedBlock in project hadoop by apache.
the class TestUnderReplicatedBlocks method testSetrepIncWithUnderReplicatedBlocks.
// 1 min timeout
@Test(timeout = 60000)
public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
Configuration conf = new HdfsConfiguration();
final short REPLICATION_FACTOR = 2;
final String FILE_NAME = "/testFile";
final Path FILE_PATH = new Path(FILE_NAME);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR + 1).build();
try {
// create a file with one block with a replication factor of 2
final FileSystem fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
// remove one replica from the blocksMap so block becomes under-replicated
// but the block does not get put into the under-replicated blocks queue
final BlockManager bm = cluster.getNamesystem().getBlockManager();
ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
DatanodeDescriptor dn = bm.blocksMap.getStorages(b.getLocalBlock()).iterator().next().getDatanodeDescriptor();
bm.addToInvalidates(b.getLocalBlock(), dn);
// Compute the invalidate work in NN, and trigger the heartbeat from DN
BlockManagerTestUtil.computeAllPendingWork(bm);
DataNodeTestUtils.triggerHeartbeat(cluster.getDataNode(dn.getIpcPort()));
// Wait to make sure the DataNode receives the deletion request
Thread.sleep(5000);
// Remove the record from blocksMap
bm.blocksMap.removeNode(b.getLocalBlock(), dn);
// increment this file's replication factor
FsShell shell = new FsShell(conf);
assertEquals(0, shell.run(new String[] { "-setrep", "-w", Integer.toString(1 + REPLICATION_FACTOR), FILE_NAME }));
} finally {
cluster.shutdown();
}
}
Aggregations