use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testRemoveVolumeBeingWrittenForDatanode.
/**
* Test the case that remove a data volume on a particular DataNode when the
* volume is actively being written.
* @param dataNodeIdx the index of the DataNode to remove a volume.
*/
private void testRemoveVolumeBeingWrittenForDatanode(int dataNodeIdx) throws IOException, ReconfigurationException, TimeoutException, InterruptedException, BrokenBarrierException {
// Starts DFS cluster with 3 DataNodes to form a pipeline.
startDFSCluster(1, 3);
final short REPLICATION = 3;
final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
final FileSystem fs = cluster.getFileSystem();
final Path testFile = new Path("/test");
FSDataOutputStream out = fs.create(testFile, REPLICATION);
Random rb = new Random(0);
// half of the block.
byte[] writeBuf = new byte[BLOCK_SIZE / 2];
rb.nextBytes(writeBuf);
out.write(writeBuf);
out.hflush();
// Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the
// BlockReceiver releases volume reference before finalizeBlock(), the blocks
// on the volume will be removed, and finalizeBlock() throws IOE.
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
dn.data = Mockito.spy(data);
doAnswer(new Answer<Object>() {
public Object answer(InvocationOnMock invocation) throws IOException, InterruptedException {
Thread.sleep(1000);
// Bypass the argument to FsDatasetImpl#finalizeBlock to verify that
// the block is not removed, since the volume reference should not
// be released at this point.
data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]);
return null;
}
}).when(dn.data).finalizeBlock(any(ExtendedBlock.class));
final CyclicBarrier barrier = new CyclicBarrier(2);
List<String> oldDirs = getDataDirs(dn);
// Remove the first volume.
final String newDirs = oldDirs.get(1);
final List<Exception> exceptions = new ArrayList<>();
Thread reconfigThread = new Thread() {
public void run() {
try {
barrier.await();
assertThat("DN did not update its own config", dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDirs), is(dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY)));
} catch (ReconfigurationException | InterruptedException | BrokenBarrierException e) {
exceptions.add(e);
}
}
};
reconfigThread.start();
barrier.await();
rb.nextBytes(writeBuf);
out.write(writeBuf);
out.hflush();
out.close();
reconfigThread.join();
// Verify if the data directory reconfigure was successful
FsDatasetSpi<? extends FsVolumeSpi> fsDatasetSpi = dn.getFSDataset();
try (FsDatasetSpi.FsVolumeReferences fsVolumeReferences = fsDatasetSpi.getFsVolumeReferences()) {
for (int i = 0; i < fsVolumeReferences.size(); i++) {
System.out.println("Vol: " + fsVolumeReferences.get(i).getBaseURI().toString());
}
assertEquals("Volume remove wasn't successful.", 1, fsVolumeReferences.size());
}
// Verify the file has sufficient replications.
DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
// Read the content back
byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
assertEquals(BLOCK_SIZE, content.length);
if (!exceptions.isEmpty()) {
throw new IOException(exceptions.get(0).getCause());
}
}
use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.
the class TestDataNodeVolumeFailure method testFailedVolumeBeingRemovedFromDataNode.
/**
* Test that DataStorage and BlockPoolSliceStorage remove the failed volume
* after failure.
*/
@Test(timeout = 150000)
public void testFailedVolumeBeingRemovedFromDataNode() throws Exception {
// The test uses DataNodeTestUtils#injectDataDirFailure() to simulate
// volume failures which is currently not supported on Windows.
assumeNotWindows();
Path file1 = new Path("/test1");
DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L);
DFSTestUtil.waitReplication(fs, file1, (short) 2);
File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
DataNode dn0 = cluster.getDataNodes().get(0);
DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol1));
// Verify dn0Vol1 has been completely removed from DN0.
// 1. dn0Vol1 is removed from DataStorage.
DataStorage storage = dn0.getStorage();
assertEquals(1, storage.getNumStorageDirs());
for (int i = 0; i < storage.getNumStorageDirs(); i++) {
Storage.StorageDirectory sd = storage.getStorageDir(i);
assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
}
final String bpid = cluster.getNamesystem().getBlockPoolId();
BlockPoolSliceStorage bpsStorage = storage.getBPStorage(bpid);
assertEquals(1, bpsStorage.getNumStorageDirs());
for (int i = 0; i < bpsStorage.getNumStorageDirs(); i++) {
Storage.StorageDirectory sd = bpsStorage.getStorageDir(i);
assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
}
// 2. dn0Vol1 is removed from FsDataset
FsDatasetSpi<? extends FsVolumeSpi> data = dn0.getFSDataset();
try (FsDatasetSpi.FsVolumeReferences vols = data.getFsVolumeReferences()) {
for (FsVolumeSpi volume : vols) {
assertFalse(new File(volume.getStorageLocation().getUri()).getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
}
}
// 3. all blocks on dn0Vol1 have been removed.
for (ReplicaInfo replica : FsDatasetTestUtil.getReplicas(data, bpid)) {
assertNotNull(replica.getVolume());
assertFalse(new File(replica.getVolume().getStorageLocation().getUri()).getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
}
// 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore.
String[] dataDirStrs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
assertEquals(1, dataDirStrs.length);
assertFalse(dataDirStrs[0].contains(dn0Vol1.getAbsolutePath()));
}
use of org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi in project hadoop by apache.
the class DatasetVolumeChecker method checkAllVolumes.
/**
* Run checks against all volumes of a dataset.
*
* This check may be performed at service startup and subsequently at
* regular intervals to detect and handle failed volumes.
*
* @param dataset - FsDatasetSpi to be checked.
* @return set of failed volumes.
*/
public Set<FsVolumeSpi> checkAllVolumes(final FsDatasetSpi<? extends FsVolumeSpi> dataset) throws InterruptedException {
final long gap = timer.monotonicNow() - lastAllVolumesCheck;
if (gap < minDiskCheckGapMs) {
numSkippedChecks.incrementAndGet();
LOG.trace("Skipped checking all volumes, time since last check {} is less " + "than the minimum gap between checks ({} ms).", gap, minDiskCheckGapMs);
return Collections.emptySet();
}
final FsDatasetSpi.FsVolumeReferences references = dataset.getFsVolumeReferences();
if (references.size() == 0) {
LOG.warn("checkAllVolumesAsync - no volumes can be referenced");
return Collections.emptySet();
}
lastAllVolumesCheck = timer.monotonicNow();
final Set<FsVolumeSpi> healthyVolumes = new HashSet<>();
final Set<FsVolumeSpi> failedVolumes = new HashSet<>();
final Set<FsVolumeSpi> allVolumes = new HashSet<>();
final AtomicLong numVolumes = new AtomicLong(references.size());
final CountDownLatch latch = new CountDownLatch(1);
for (int i = 0; i < references.size(); ++i) {
final FsVolumeReference reference = references.getReference(i);
Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(reference.getVolume(), IGNORED_CONTEXT);
LOG.info("Scheduled health check for volume {}", reference.getVolume());
if (olf.isPresent()) {
allVolumes.add(reference.getVolume());
Futures.addCallback(olf.get(), new ResultHandler(reference, healthyVolumes, failedVolumes, numVolumes, new Callback() {
@Override
public void call(Set<FsVolumeSpi> ignored1, Set<FsVolumeSpi> ignored2) {
latch.countDown();
}
}));
} else {
IOUtils.cleanup(null, reference);
if (numVolumes.decrementAndGet() == 0) {
latch.countDown();
}
}
}
// the remaining volumes.
if (!latch.await(maxAllowedTimeForCheckMs, TimeUnit.MILLISECONDS)) {
LOG.warn("checkAllVolumes timed out after {} ms" + maxAllowedTimeForCheckMs);
}
numSyncDatasetChecks.incrementAndGet();
synchronized (this) {
// of a potentially changing set.
return new HashSet<>(Sets.difference(allVolumes, healthyVolumes));
}
}
Aggregations