use of java.util.concurrent.Semaphore in project hadoop by apache.
the class TestBlockScanner method testAppendWhileScanning.
/**
* Test concurrent append and scan.
* @throws Exception
*/
@Test(timeout = 120000)
public void testAppendWhileScanning() throws Exception {
GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL);
Configuration conf = new Configuration();
// throttle the block scanner: 1MB per second
conf.setLong(DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND, 1048576);
// Set a really long scan period.
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER, TestScanResultHandler.class.getName());
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
final int numExpectedFiles = 1;
final int numExpectedBlocks = 1;
final int numNameServices = 1;
// the initial file length can not be too small.
// Otherwise checksum file stream buffer will be pre-filled and
// BlockSender will not see the updated checksum.
final int initialFileLength = 2 * 1024 * 1024 + 100;
final TestContext ctx = new TestContext(conf, numNameServices);
// create one file, with one block.
ctx.createFiles(0, numExpectedFiles, initialFileLength);
final TestScanResultHandler.Info info = TestScanResultHandler.getInfo(ctx.volumes.get(0));
String storageID = ctx.volumes.get(0).getStorageID();
synchronized (info) {
info.sem = new Semaphore(numExpectedBlocks * 2);
info.shouldRun = true;
info.notify();
}
// VolumeScanner scans the first block when DN starts.
// Due to throttler, this should take approximately 2 seconds.
waitForRescan(info, numExpectedBlocks);
// update throttler to schedule rescan immediately.
// this number must be larger than initial file length, otherwise
// throttler prevents immediate rescan.
conf.setLong(DFS_BLOCK_SCANNER_VOLUME_BYTES_PER_SECOND, initialFileLength + 32 * 1024);
BlockScanner.Conf newConf = new BlockScanner.Conf(conf);
ctx.datanode.getBlockScanner().setConf(newConf);
// schedule the first block for scanning
ExtendedBlock first = ctx.getFileBlock(0, 0);
ctx.datanode.getBlockScanner().markSuspectBlock(storageID, first);
// append the file before VolumeScanner completes scanning the block,
// which takes approximately 2 seconds to complete.
FileSystem fs = ctx.cluster.getFileSystem();
FSDataOutputStream os = fs.append(ctx.getPath(0));
long seed = -1;
int size = 200;
final byte[] bytes = AppendTestUtil.randomBytes(seed, size);
os.write(bytes);
os.hflush();
os.close();
fs.close();
// verify that volume scanner does not find bad blocks after append.
waitForRescan(info, numExpectedBlocks);
GenericTestUtils.setLogLevel(DataNode.LOG, Level.INFO);
}
use of java.util.concurrent.Semaphore in project hadoop by apache.
the class TestBlockScanner method testIgnoreMisplacedBlock.
/**
* Test that blocks which are in the wrong location are ignored.
*/
@Test(timeout = 120000)
public void testIgnoreMisplacedBlock() throws Exception {
Configuration conf = new Configuration();
// Set a really long scan period.
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER, TestScanResultHandler.class.getName());
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
final TestContext ctx = new TestContext(conf, 1);
final int NUM_FILES = 4;
ctx.createFiles(0, NUM_FILES, 5);
MaterializedReplica unreachableReplica = ctx.getMaterializedReplica(0, 1);
ExtendedBlock unreachableBlock = ctx.getFileBlock(0, 1);
unreachableReplica.makeUnreachable();
final TestScanResultHandler.Info info = TestScanResultHandler.getInfo(ctx.volumes.get(0));
String storageID = ctx.volumes.get(0).getStorageID();
synchronized (info) {
info.sem = new Semaphore(NUM_FILES);
info.shouldRun = true;
info.notify();
}
// Scan the first 4 blocks
LOG.info("Waiting for the blocks to be scanned.");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned >= NUM_FILES - 1) {
LOG.info("info = {}. blockScanned has now reached " + info.blocksScanned, info);
return true;
} else {
LOG.info("info = {}. Waiting for blockScanned to reach " + (NUM_FILES - 1), info);
return false;
}
}
}
}, 50, 30000);
// We should have scanned 4 blocks
synchronized (info) {
assertFalse(info.goodBlocks.contains(unreachableBlock));
assertFalse(info.badBlocks.contains(unreachableBlock));
assertEquals("Expected 3 good blocks.", 3, info.goodBlocks.size());
info.goodBlocks.clear();
assertEquals("Expected 3 blocksScanned", 3, info.blocksScanned);
assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
info.blocksScanned = 0;
}
info.sem.release(1);
}
use of java.util.concurrent.Semaphore in project hadoop by apache.
the class TestBlockScanner method testDatanodeCursor.
/**
* Test that we save the scan cursor when shutting down the datanode, and
* restart scanning from there when the datanode is restarted.
*/
@Test(timeout = 120000)
public void testDatanodeCursor() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER, TestScanResultHandler.class.getName());
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
final TestContext ctx = new TestContext(conf, 1);
final int NUM_EXPECTED_BLOCKS = 10;
ctx.createFiles(0, NUM_EXPECTED_BLOCKS, 1);
final TestScanResultHandler.Info info = TestScanResultHandler.getInfo(ctx.volumes.get(0));
synchronized (info) {
info.sem = new Semaphore(5);
info.shouldRun = true;
info.notify();
}
// Scan the first 5 blocks
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
return info.blocksScanned == 5;
}
}
}, 3, 30000);
synchronized (info) {
assertEquals(5, info.goodBlocks.size());
assertEquals(5, info.blocksScanned);
info.shouldRun = false;
}
ctx.datanode.shutdown();
URI vURI = ctx.volumes.get(0).getStorageLocation().getUri();
File cursorPath = new File(new File(new File(new File(vURI), "current"), ctx.bpids[0]), "scanner.cursor");
assertTrue("Failed to find cursor save file in " + cursorPath.getAbsolutePath(), cursorPath.exists());
Set<ExtendedBlock> prevGoodBlocks = new HashSet<ExtendedBlock>();
synchronized (info) {
info.sem = new Semaphore(4);
prevGoodBlocks.addAll(info.goodBlocks);
info.goodBlocks.clear();
}
// The block that we were scanning when we shut down the DN won't get
// recorded.
// After restarting the datanode, we should scan the next 4 blocks.
ctx.cluster.restartDataNode(0);
synchronized (info) {
info.shouldRun = true;
info.notify();
}
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned != 9) {
LOG.info("Waiting for blocksScanned to reach 9. It is at {}", info.blocksScanned);
}
return info.blocksScanned == 9;
}
}
}, 3, 30000);
synchronized (info) {
assertEquals(4, info.goodBlocks.size());
info.goodBlocks.addAll(prevGoodBlocks);
assertEquals(9, info.goodBlocks.size());
assertEquals(9, info.blocksScanned);
}
ctx.datanode.shutdown();
// the scan period is much, much longer than the test time.
synchronized (info) {
info.sem = null;
info.shouldRun = false;
info.goodBlocks.clear();
}
ctx.cluster.restartDataNode(0);
synchronized (info) {
info.shouldRun = true;
info.notify();
}
Thread.sleep(3000);
synchronized (info) {
assertTrue(info.goodBlocks.isEmpty());
}
ctx.close();
}
use of java.util.concurrent.Semaphore in project hadoop by apache.
the class TestBlockScanner method testMarkSuspectBlock.
/**
* Test that we can mark certain blocks as suspect, and get them quickly
* rescanned that way. See HDFS-7686 and HDFS-7548.
*/
@Test(timeout = 120000)
public void testMarkSuspectBlock() throws Exception {
Configuration conf = new Configuration();
// Set a really long scan period.
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER, TestScanResultHandler.class.getName());
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
final TestContext ctx = new TestContext(conf, 1);
final int NUM_EXPECTED_BLOCKS = 10;
ctx.createFiles(0, NUM_EXPECTED_BLOCKS, 1);
final TestScanResultHandler.Info info = TestScanResultHandler.getInfo(ctx.volumes.get(0));
String storageID = ctx.volumes.get(0).getStorageID();
synchronized (info) {
info.sem = new Semaphore(4);
info.shouldRun = true;
info.notify();
}
// Scan the first 4 blocks
LOG.info("Waiting for the first 4 blocks to be scanned.");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned >= 4) {
LOG.info("info = {}. blockScanned has now reached 4.", info);
return true;
} else {
LOG.info("info = {}. Waiting for blockScanned to reach 4.", info);
return false;
}
}
}
}, 50, 30000);
// We should have scanned 4 blocks
synchronized (info) {
assertEquals("Expected 4 good blocks.", 4, info.goodBlocks.size());
info.goodBlocks.clear();
assertEquals("Expected 4 blocksScanned", 4, info.blocksScanned);
assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
info.blocksScanned = 0;
}
ExtendedBlock first = ctx.getFileBlock(0, 0);
ctx.datanode.getBlockScanner().markSuspectBlock(storageID, first);
// When we increment the semaphore, the TestScanResultHandler will finish
// adding the block that it was scanning previously (the 5th block).
// We increment the semaphore twice so that the handler will also
// get a chance to see the suspect block which we just requested the
// VolumeScanner to process.
info.sem.release(2);
LOG.info("Waiting for 2 more blocks to be scanned.");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned >= 2) {
LOG.info("info = {}. blockScanned has now reached 2.", info);
return true;
} else {
LOG.info("info = {}. Waiting for blockScanned to reach 2.", info);
return false;
}
}
}
}, 50, 30000);
synchronized (info) {
assertTrue("Expected block " + first + " to have been scanned.", info.goodBlocks.contains(first));
assertEquals(2, info.goodBlocks.size());
info.goodBlocks.clear();
assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
assertEquals(2, info.blocksScanned);
info.blocksScanned = 0;
}
// Re-mark the same block as suspect.
ctx.datanode.getBlockScanner().markSuspectBlock(storageID, first);
info.sem.release(10);
LOG.info("Waiting for 5 more blocks to be scanned.");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned >= 5) {
LOG.info("info = {}. blockScanned has now reached 5.", info);
return true;
} else {
LOG.info("info = {}. Waiting for blockScanned to reach 5.", info);
return false;
}
}
}
}, 50, 30000);
synchronized (info) {
assertEquals(5, info.goodBlocks.size());
assertEquals(0, info.badBlocks.size());
assertEquals(5, info.blocksScanned);
// We should not have rescanned the "suspect block",
// because it was recently rescanned by the suspect block system.
// This is a test of the "suspect block" rate limiting.
Assert.assertFalse("We should not " + "have rescanned block " + first + ", because it should have been " + "in recentSuspectBlocks.", info.goodBlocks.contains(first));
info.blocksScanned = 0;
}
}
use of java.util.concurrent.Semaphore in project hadoop by apache.
the class TestBlockReportRateLimiting method testLeaseExpiration.
/**
* Start a 2-node cluster with only one block report lease. When the
* first datanode gets a lease, kill it. Then wait for the lease to
* expire, and the second datanode to send a full block report.
*/
@Test(timeout = 180000)
public void testLeaseExpiration() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFS_NAMENODE_MAX_FULL_BLOCK_REPORT_LEASES, 1);
conf.setLong(DFS_NAMENODE_FULL_BLOCK_REPORT_LEASE_LENGTH_MS, 100L);
final Semaphore gotFbrSem = new Semaphore(0);
final AtomicReference<String> failure = new AtomicReference<>();
final AtomicReference<MiniDFSCluster> cluster = new AtomicReference<>();
final AtomicReference<String> datanodeToStop = new AtomicReference<>();
final BlockManagerFaultInjector injector = new BlockManagerFaultInjector() {
@Override
public void incomingBlockReportRpc(DatanodeID nodeID, BlockReportContext context) throws IOException {
if (context.getLeaseId() == 0) {
setFailure(failure, "Got unexpected rate-limiting-" + "bypassing full block report RPC from " + nodeID);
}
if (nodeID.getXferAddr().equals(datanodeToStop.get())) {
throw new IOException("Injecting failure into block " + "report RPC for " + nodeID);
}
gotFbrSem.release();
}
@Override
public void requestBlockReportLease(DatanodeDescriptor node, long leaseId) {
if (leaseId == 0) {
return;
}
datanodeToStop.compareAndSet(null, node.getXferAddr());
}
@Override
public void removeBlockReportLease(DatanodeDescriptor node, long leaseId) {
}
};
try {
BlockManagerFaultInjector.instance = injector;
cluster.set(new MiniDFSCluster.Builder(conf).numDataNodes(2).build());
cluster.get().waitActive();
Assert.assertNotNull(cluster.get().stopDataNode(datanodeToStop.get()));
gotFbrSem.acquire();
Assert.assertNull(failure.get());
} finally {
if (cluster.get() != null) {
cluster.get().shutdown();
}
}
}
Aggregations