use of java.util.concurrent.atomic.AtomicBoolean in project hadoop by apache.
the class TestBlockManager method testAsyncIBR.
// spam the block manager with IBRs to verify queuing is occurring.
@Test
public void testAsyncIBR() throws Exception {
Logger.getRootLogger().setLevel(Level.WARN);
// will create files with many small blocks.
final int blkSize = 4 * 1024;
final int fileSize = blkSize * 100;
final byte[] buf = new byte[2 * blkSize];
final int numWriters = 4;
final int repl = 3;
final CyclicBarrier barrier = new CyclicBarrier(numWriters);
final CountDownLatch writeLatch = new CountDownLatch(numWriters);
final AtomicBoolean failure = new AtomicBoolean();
final Configuration conf = new HdfsConfiguration();
conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, blkSize);
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(8).build();
try {
cluster.waitActive();
// create multiple writer threads to create a file with many blocks.
// will test that concurrent writing causes IBR batching in the NN
Thread[] writers = new Thread[numWriters];
for (int i = 0; i < writers.length; i++) {
final Path p = new Path("/writer" + i);
writers[i] = new Thread(new Runnable() {
@Override
public void run() {
try {
FileSystem fs = cluster.getFileSystem();
FSDataOutputStream os = fs.create(p, true, buf.length, (short) repl, blkSize);
// align writers for maximum chance of IBR batching.
barrier.await();
int remaining = fileSize;
while (remaining > 0) {
os.write(buf);
remaining -= buf.length;
}
os.close();
} catch (Exception e) {
e.printStackTrace();
failure.set(true);
}
// let main thread know we are done.
writeLatch.countDown();
}
});
writers[i].start();
}
// when and how many IBRs are queued is indeterminate, so just watch
// the metrics and verify something was queued at during execution.
boolean sawQueued = false;
while (!writeLatch.await(10, TimeUnit.MILLISECONDS)) {
assertFalse(failure.get());
MetricsRecordBuilder rb = getMetrics("NameNodeActivity");
long queued = MetricsAsserts.getIntGauge("BlockOpsQueued", rb);
sawQueued |= (queued > 0);
}
assertFalse(failure.get());
assertTrue(sawQueued);
// verify that batching of the IBRs occurred.
MetricsRecordBuilder rb = getMetrics("NameNodeActivity");
long batched = MetricsAsserts.getLongCounter("BlockOpsBatched", rb);
assertTrue(batched > 0);
} finally {
cluster.shutdown();
}
}
use of java.util.concurrent.atomic.AtomicBoolean in project hadoop by apache.
the class TestBlockRecovery method testRaceBetweenReplicaRecoveryAndFinalizeBlock.
/**
* Test to verify the race between finalizeBlock and Lease recovery
*
* @throws Exception
*/
@Test(timeout = 20000)
public void testRaceBetweenReplicaRecoveryAndFinalizeBlock() throws Exception {
// Stop the Mocked DN started in startup()
tearDown();
Configuration conf = new HdfsConfiguration();
conf.set(DFSConfigKeys.DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_KEY, "1000");
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
try {
cluster.waitClusterUp();
DistributedFileSystem fs = cluster.getFileSystem();
Path path = new Path("/test");
FSDataOutputStream out = fs.create(path);
out.writeBytes("data");
out.hsync();
List<LocatedBlock> blocks = DFSTestUtil.getAllBlocks(fs.open(path));
final LocatedBlock block = blocks.get(0);
final DataNode dataNode = cluster.getDataNodes().get(0);
final AtomicBoolean recoveryInitResult = new AtomicBoolean(true);
Thread recoveryThread = new Thread() {
@Override
public void run() {
try {
DatanodeInfo[] locations = block.getLocations();
final RecoveringBlock recoveringBlock = new RecoveringBlock(block.getBlock(), locations, block.getBlock().getGenerationStamp() + 1);
try (AutoCloseableLock lock = dataNode.data.acquireDatasetLock()) {
Thread.sleep(2000);
dataNode.initReplicaRecovery(recoveringBlock);
}
} catch (Exception e) {
recoveryInitResult.set(false);
}
}
};
recoveryThread.start();
try {
out.close();
} catch (IOException e) {
Assert.assertTrue("Writing should fail", e.getMessage().contains("are bad. Aborting..."));
} finally {
recoveryThread.join();
}
Assert.assertTrue("Recovery should be initiated successfully", recoveryInitResult.get());
dataNode.updateReplicaUnderRecovery(block.getBlock(), block.getBlock().getGenerationStamp() + 1, block.getBlock().getBlockId(), block.getBlockSize());
} finally {
if (null != cluster) {
cluster.shutdown();
cluster = null;
}
}
}
use of java.util.concurrent.atomic.AtomicBoolean in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testAddVolumesConcurrently.
@Test(timeout = 180000)
public void testAddVolumesConcurrently() throws IOException, InterruptedException, TimeoutException, ReconfigurationException {
startDFSCluster(1, 1, 10);
int numVolumes = cluster.getStoragesPerDatanode();
String blockPoolId = cluster.getNamesystem().getBlockPoolId();
Path testFile = new Path("/test");
// Each volume has 2 blocks
int initialBlockCount = numVolumes * 2;
createFile(testFile, initialBlockCount);
DataNode dn = cluster.getDataNodes().get(0);
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
dn.data = Mockito.spy(data);
final int newVolumeCount = 40;
List<Thread> addVolumeDelayedThreads = new ArrayList<>();
AtomicBoolean addVolumeError = new AtomicBoolean(false);
AtomicBoolean listStorageError = new AtomicBoolean(false);
CountDownLatch addVolumeCompletionLatch = new CountDownLatch(newVolumeCount);
// Thread to list all storage available at DataNode,
// when the volumes are being added in parallel.
final Thread listStorageThread = new Thread(new Runnable() {
@Override
public void run() {
while (addVolumeCompletionLatch.getCount() != newVolumeCount) {
int i = 0;
while (i++ < 1000) {
try {
dn.getStorage().listStorageDirectories();
} catch (Exception e) {
listStorageError.set(true);
LOG.error("Error listing storage: " + e);
}
}
}
}
});
listStorageThread.start();
// FsDatasetImpl addVolume mocked to perform the operation asynchronously
doAnswer(new Answer<Object>() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
final Random r = new Random();
Thread addVolThread = new Thread(new Runnable() {
@Override
public void run() {
try {
r.setSeed(Time.now());
// start after an initial delay.
if (r.nextInt(10) > 4) {
int s = r.nextInt(10) + 1;
Thread.sleep(s * 100);
}
invocationOnMock.callRealMethod();
} catch (Throwable throwable) {
addVolumeError.set(true);
LOG.error("Error adding volume: " + throwable);
} finally {
addVolumeCompletionLatch.countDown();
}
}
});
addVolumeDelayedThreads.add(addVolThread);
addVolThread.start();
return null;
}
}).when(dn.data).addVolume(any(StorageLocation.class), any(List.class));
addVolumes(newVolumeCount, addVolumeCompletionLatch);
numVolumes += newVolumeCount;
// Wait for all addVolume and listStorage Threads to complete
for (Thread t : addVolumeDelayedThreads) {
t.join();
}
listStorageThread.join();
// Verify errors while adding volumes and listing storage directories
Assert.assertEquals("Error adding volumes!", false, addVolumeError.get());
Assert.assertEquals("Error listing storage!", false, listStorageError.get());
int additionalBlockCount = 9;
int totalBlockCount = initialBlockCount + additionalBlockCount;
// Continue to write the same file, thus the new volumes will have blocks.
DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * additionalBlockCount);
verifyFileLength(cluster.getFileSystem(), testFile, totalBlockCount);
List<Map<DatanodeStorage, BlockListAsLongs>> blockReports = cluster.getAllBlockReports(blockPoolId);
assertEquals(1, blockReports.size());
assertEquals(numVolumes, blockReports.get(0).size());
}
use of java.util.concurrent.atomic.AtomicBoolean in project hadoop by apache.
the class TestDistributedScheduler method testDistributedScheduler.
@Test
public void testDistributedScheduler() throws Exception {
Configuration conf = new Configuration();
DistributedScheduler distributedScheduler = new DistributedScheduler();
RequestInterceptor finalReqIntcptr = setup(conf, distributedScheduler);
registerAM(distributedScheduler, finalReqIntcptr, Arrays.asList(RemoteNode.newInstance(NodeId.newInstance("a", 1), "http://a:1"), RemoteNode.newInstance(NodeId.newInstance("b", 2), "http://b:2")));
final AtomicBoolean flipFlag = new AtomicBoolean(true);
Mockito.when(finalReqIntcptr.allocateForDistributedScheduling(Mockito.any(DistributedSchedulingAllocateRequest.class))).thenAnswer(new Answer<DistributedSchedulingAllocateResponse>() {
@Override
public DistributedSchedulingAllocateResponse answer(InvocationOnMock invocationOnMock) throws Throwable {
flipFlag.set(!flipFlag.get());
if (flipFlag.get()) {
return createAllocateResponse(Arrays.asList(RemoteNode.newInstance(NodeId.newInstance("c", 3), "http://c:3"), RemoteNode.newInstance(NodeId.newInstance("d", 4), "http://d:4")));
} else {
return createAllocateResponse(Arrays.asList(RemoteNode.newInstance(NodeId.newInstance("d", 4), "http://d:4"), RemoteNode.newInstance(NodeId.newInstance("c", 3), "http://c:3")));
}
}
});
AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class);
ResourceRequest guaranteedReq = createResourceRequest(ExecutionType.GUARANTEED, 5, "*");
ResourceRequest opportunisticReq = createResourceRequest(ExecutionType.OPPORTUNISTIC, 4, "*");
allocateRequest.setAskList(Arrays.asList(guaranteedReq, opportunisticReq));
// Verify 4 containers were allocated
AllocateResponse allocateResponse = distributedScheduler.allocate(allocateRequest);
Assert.assertEquals(4, allocateResponse.getAllocatedContainers().size());
// Verify equal distribution on hosts a and b, and none on c or d
Map<NodeId, List<ContainerId>> allocs = mapAllocs(allocateResponse, 4);
Assert.assertEquals(2, allocs.get(NodeId.newInstance("a", 1)).size());
Assert.assertEquals(2, allocs.get(NodeId.newInstance("b", 2)).size());
Assert.assertNull(allocs.get(NodeId.newInstance("c", 3)));
Assert.assertNull(allocs.get(NodeId.newInstance("d", 4)));
// New Allocate request
allocateRequest = Records.newRecord(AllocateRequest.class);
opportunisticReq = createResourceRequest(ExecutionType.OPPORTUNISTIC, 6, "*");
allocateRequest.setAskList(Arrays.asList(guaranteedReq, opportunisticReq));
// Verify 6 containers were allocated
allocateResponse = distributedScheduler.allocate(allocateRequest);
Assert.assertEquals(6, allocateResponse.getAllocatedContainers().size());
// Verify new containers are equally distribution on hosts c and d,
// and none on a or b
allocs = mapAllocs(allocateResponse, 6);
Assert.assertEquals(3, allocs.get(NodeId.newInstance("c", 3)).size());
Assert.assertEquals(3, allocs.get(NodeId.newInstance("d", 4)).size());
Assert.assertNull(allocs.get(NodeId.newInstance("a", 1)));
Assert.assertNull(allocs.get(NodeId.newInstance("b", 2)));
// Ensure the DistributedScheduler respects the list order..
// The first request should be allocated to "d" since it is ranked higher
// The second request should be allocated to "c" since the ranking is
// flipped on every allocate response.
allocateRequest = Records.newRecord(AllocateRequest.class);
opportunisticReq = createResourceRequest(ExecutionType.OPPORTUNISTIC, 1, "*");
allocateRequest.setAskList(Arrays.asList(guaranteedReq, opportunisticReq));
allocateResponse = distributedScheduler.allocate(allocateRequest);
allocs = mapAllocs(allocateResponse, 1);
Assert.assertEquals(1, allocs.get(NodeId.newInstance("d", 4)).size());
allocateRequest = Records.newRecord(AllocateRequest.class);
opportunisticReq = createResourceRequest(ExecutionType.OPPORTUNISTIC, 1, "*");
allocateRequest.setAskList(Arrays.asList(guaranteedReq, opportunisticReq));
allocateResponse = distributedScheduler.allocate(allocateRequest);
allocs = mapAllocs(allocateResponse, 1);
Assert.assertEquals(1, allocs.get(NodeId.newInstance("c", 3)).size());
allocateRequest = Records.newRecord(AllocateRequest.class);
opportunisticReq = createResourceRequest(ExecutionType.OPPORTUNISTIC, 1, "*");
allocateRequest.setAskList(Arrays.asList(guaranteedReq, opportunisticReq));
allocateResponse = distributedScheduler.allocate(allocateRequest);
allocs = mapAllocs(allocateResponse, 1);
Assert.assertEquals(1, allocs.get(NodeId.newInstance("d", 4)).size());
}
use of java.util.concurrent.atomic.AtomicBoolean in project hadoop by apache.
the class TestDistributedShell method testDSShell.
public void testDSShell(boolean haveDomain, boolean defaultFlow) throws Exception {
String[] args = { "--jar", APPMASTER_JAR, "--num_containers", "2", "--shell_command", Shell.WINDOWS ? "dir" : "ls", "--master_memory", "512", "--master_vcores", "2", "--container_memory", "128", "--container_vcores", "1" };
if (haveDomain) {
String[] domainArgs = { "--domain", "TEST_DOMAIN", "--view_acls", "reader_user reader_group", "--modify_acls", "writer_user writer_group", "--create" };
args = mergeArgs(args, domainArgs);
}
boolean isTestingTimelineV2 = false;
if (timelineVersionWatcher.getTimelineVersion() == 2.0f) {
isTestingTimelineV2 = true;
if (!defaultFlow) {
String[] flowArgs = { "--flow_name", "test_flow_name", "--flow_version", "test_flow_version", "--flow_run_id", "12345678" };
args = mergeArgs(args, flowArgs);
}
LOG.info("Setup: Using timeline v2!");
}
LOG.info("Initializing DS Client");
final Client client = new Client(new Configuration(yarnCluster.getConfig()));
boolean initSuccess = client.init(args);
Assert.assertTrue(initSuccess);
LOG.info("Running DS Client");
final AtomicBoolean result = new AtomicBoolean(false);
Thread t = new Thread() {
public void run() {
try {
result.set(client.run());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
t.start();
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(new Configuration(yarnCluster.getConfig()));
yarnClient.start();
String hostName = NetUtils.getHostname();
boolean verified = false;
String errorMessage = "";
ApplicationId appId = null;
ApplicationReport appReport = null;
while (!verified) {
List<ApplicationReport> apps = yarnClient.getApplications();
if (apps.size() == 0) {
Thread.sleep(10);
continue;
}
appReport = apps.get(0);
appId = appReport.getApplicationId();
if (appReport.getHost().equals("N/A")) {
Thread.sleep(10);
continue;
}
errorMessage = "Expected host name to start with '" + hostName + "', was '" + appReport.getHost() + "'. Expected rpc port to be '-1', was '" + appReport.getRpcPort() + "'.";
if (checkHostname(appReport.getHost()) && appReport.getRpcPort() == -1) {
verified = true;
}
if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED && appReport.getFinalApplicationStatus() != FinalApplicationStatus.UNDEFINED) {
break;
}
}
Assert.assertTrue(errorMessage, verified);
t.join();
LOG.info("Client run completed for testDSShell. Result=" + result);
Assert.assertTrue(result.get());
if (timelineVersionWatcher.getTimelineVersion() == 1.5f) {
long scanInterval = conf.getLong(YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_SCAN_INTERVAL_SECONDS, YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_SCAN_INTERVAL_SECONDS_DEFAULT);
Path doneDir = new Path(YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_DONE_DIR_DEFAULT);
// Wait till the data is moved to done dir, or timeout and fail
while (true) {
RemoteIterator<FileStatus> iterApps = fs.listStatusIterator(doneDir);
if (iterApps.hasNext()) {
break;
}
Thread.sleep(scanInterval * 2);
}
}
TimelineDomain domain = null;
if (!isTestingTimelineV2) {
checkTimelineV1(haveDomain);
} else {
checkTimelineV2(haveDomain, appId, defaultFlow, appReport);
}
}
Aggregations