use of java.util.concurrent.TimeoutException in project flink by apache.
the class RetryingRegistrationTest method testCancellation.
@Test
public void testCancellation() throws Exception {
final String testEndpointAddress = "my-test-address";
final UUID leaderId = UUID.randomUUID();
TestingRpcService rpc = new TestingRpcService();
try {
FlinkCompletableFuture<RegistrationResponse> result = new FlinkCompletableFuture<>();
TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(result);
rpc.registerGateway(testEndpointAddress, testGateway);
TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
registration.startRegistration();
// cancel and fail the current registration attempt
registration.cancel();
result.completeExceptionally(new TimeoutException());
// there should not be a second registration attempt
verify(testGateway, atMost(1)).registrationCall(any(UUID.class), anyLong());
} finally {
rpc.stopService();
}
}
use of java.util.concurrent.TimeoutException in project hadoop by apache.
the class StorageLocationChecker method check.
/**
* Initiate a check of the supplied storage volumes and return
* a list of failed volumes.
*
* StorageLocations are returned in the same order as the input
* for compatibility with existing unit tests.
*
* @param conf HDFS configuration.
* @param dataDirs list of volumes to check.
* @return returns a list of failed volumes. Returns the empty list if
* there are no failed volumes.
*
* @throws InterruptedException if the check was interrupted.
* @throws IOException if the number of failed volumes exceeds the
* maximum allowed or if there are no good
* volumes.
*/
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
final Set<StorageLocation> failedLocations = new HashSet<>();
final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
final LocalFileSystem localFS = FileSystem.getLocal(conf);
final CheckContext context = new CheckContext(localFS, expectedPermission);
// Start parallel disk check operations on all StorageLocations.
for (StorageLocation location : dataDirs) {
goodLocations.put(location, true);
Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
if (olf.isPresent()) {
futures.put(location, olf.get());
}
}
if (maxVolumeFailuresTolerated >= dataDirs.size()) {
throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
}
final long checkStartTimeMs = timer.monotonicNow();
// Retrieve the results of the disk checks.
for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
// Determine how much time we can allow for this check to complete.
// The cumulative wait time cannot exceed maxAllowedTimeForCheck.
final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
final StorageLocation location = entry.getKey();
try {
final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
switch(result) {
case HEALTHY:
break;
case DEGRADED:
LOG.warn("StorageLocation {} appears to be degraded.", location);
break;
case FAILED:
LOG.warn("StorageLocation {} detected as failed.", location);
failedLocations.add(location);
goodLocations.remove(location);
break;
default:
LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
}
} catch (ExecutionException | TimeoutException e) {
LOG.warn("Exception checking StorageLocation " + location, e.getCause());
failedLocations.add(location);
goodLocations.remove(location);
}
}
if (failedLocations.size() > maxVolumeFailuresTolerated) {
throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
}
if (goodLocations.size() == 0) {
throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
}
return new ArrayList<>(goodLocations.keySet());
}
use of java.util.concurrent.TimeoutException in project hadoop by apache.
the class MiniDFSCluster method setDataNodeStorageCapacities.
private synchronized void setDataNodeStorageCapacities(final int curDnIdx, final DataNode curDn, long[][] storageCapacities) throws IOException {
if (storageCapacities == null || storageCapacities.length == 0) {
return;
}
try {
waitDataNodeFullyStarted(curDn);
} catch (TimeoutException | InterruptedException e) {
throw new IOException(e);
}
try (FsDatasetSpi.FsVolumeReferences volumes = curDn.getFSDataset().getFsVolumeReferences()) {
assert storageCapacities[curDnIdx].length == storagesPerDatanode;
assert volumes.size() == storagesPerDatanode;
int j = 0;
for (FsVolumeSpi fvs : volumes) {
FsVolumeImpl volume = (FsVolumeImpl) fvs;
LOG.info("setCapacityForTesting " + storageCapacities[curDnIdx][j] + " for [" + volume.getStorageType() + "]" + volume.getStorageID());
volume.setCapacityForTesting(storageCapacities[curDnIdx][j]);
j++;
}
}
DataNodeTestUtils.triggerHeartbeat(curDn);
}
use of java.util.concurrent.TimeoutException in project hadoop by apache.
the class TestReplication method testNoExtraReplicationWhenBlockReceivedIsLate.
/**
* This test makes sure that, when a file is closed before all
* of the datanodes in the pipeline have reported their replicas,
* the NameNode doesn't consider the block under-replicated too
* aggressively. It is a regression test for HDFS-1172.
*/
@Test(timeout = 60000)
public void testNoExtraReplicationWhenBlockReceivedIsLate() throws Exception {
LOG.info("Test block replication when blockReceived is late");
final short numDataNodes = 3;
final short replication = 3;
final Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
final String testFile = "/replication-test-file";
final Path testPath = new Path(testFile);
final BlockManager bm = cluster.getNameNode().getNamesystem().getBlockManager();
try {
cluster.waitActive();
// Artificially delay IBR from 1 DataNode.
// this ensures that the client's completeFile() RPC will get to the
// NN before some of the replicas are reported.
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
DatanodeProtocolClientSideTranslatorPB spy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn);
DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG);
Mockito.doAnswer(delayer).when(spy).blockReceivedAndDeleted(Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageReceivedDeletedBlocks[]>anyObject());
FileSystem fs = cluster.getFileSystem();
// Create and close a small file with two blocks
DFSTestUtil.createFile(fs, testPath, 1500, replication, 0);
// schedule replication via BlockManager#computeReplicationWork
BlockManagerTestUtil.computeAllPendingWork(bm);
// Initially, should have some pending replication since the close()
// is earlier than at lease one of the reportReceivedDeletedBlocks calls
assertTrue(pendingReplicationCount(bm) > 0);
// release pending IBR.
delayer.waitForCall();
delayer.proceed();
delayer.waitForResult();
// make sure DataNodes do replication work if exists
for (DataNode d : cluster.getDataNodes()) {
DataNodeTestUtils.triggerHeartbeat(d);
}
// Wait until there is nothing pending
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return pendingReplicationCount(bm) == 0;
}
}, 100, 3000);
} catch (TimeoutException e) {
fail("timed out while waiting for no pending replication.");
}
// Check that none of the datanodes have serviced a replication request.
// i.e. that the NameNode didn't schedule any spurious replication.
assertNoReplicationWasPerformed(cluster);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of java.util.concurrent.TimeoutException in project hadoop by apache.
the class TestBlockReaderLocal method testStatistics.
private void testStatistics(boolean isShortCircuit) throws Exception {
Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
HdfsConfiguration conf = new HdfsConfiguration();
TemporarySocketDirectory sockDir = null;
if (isShortCircuit) {
DFSInputStream.tcpReadsDisabledForTesting = true;
sockDir = new TemporarySocketDirectory();
conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(), "TestStatisticsForLocalRead.%d.sock").getAbsolutePath());
conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);
DomainSocket.disableBindPathValidation();
} else {
conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, false);
}
MiniDFSCluster cluster = null;
final Path TEST_PATH = new Path("/a");
final long RANDOM_SEED = 4567L;
FSDataInputStream fsIn = null;
byte[] original = new byte[BlockReaderLocalTest.TEST_LENGTH];
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).hosts(new String[] { NetUtils.getLocalHostname() }).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, TEST_PATH, BlockReaderLocalTest.TEST_LENGTH, (short) 1, RANDOM_SEED);
try {
DFSTestUtil.waitReplication(fs, TEST_PATH, (short) 1);
} catch (InterruptedException e) {
Assert.fail("unexpected InterruptedException during " + "waitReplication: " + e);
} catch (TimeoutException e) {
Assert.fail("unexpected TimeoutException during " + "waitReplication: " + e);
}
fsIn = fs.open(TEST_PATH);
IOUtils.readFully(fsIn, original, 0, BlockReaderLocalTest.TEST_LENGTH);
HdfsDataInputStream dfsIn = (HdfsDataInputStream) fsIn;
Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalBytesRead());
Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalLocalBytesRead());
if (isShortCircuit) {
Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalShortCircuitBytesRead());
} else {
Assert.assertEquals(0, dfsIn.getReadStatistics().getTotalShortCircuitBytesRead());
}
fsIn.close();
fsIn = null;
} finally {
DFSInputStream.tcpReadsDisabledForTesting = false;
if (fsIn != null)
fsIn.close();
if (fs != null)
fs.close();
if (cluster != null)
cluster.shutdown();
if (sockDir != null)
sockDir.close();
}
}
Aggregations