Search in sources :

Example 21 with TimeoutException

use of java.util.concurrent.TimeoutException in project flink by apache.

the class RetryingRegistrationTest method testCancellation.

@Test
public void testCancellation() throws Exception {
    final String testEndpointAddress = "my-test-address";
    final UUID leaderId = UUID.randomUUID();
    TestingRpcService rpc = new TestingRpcService();
    try {
        FlinkCompletableFuture<RegistrationResponse> result = new FlinkCompletableFuture<>();
        TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
        when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(result);
        rpc.registerGateway(testEndpointAddress, testGateway);
        TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
        registration.startRegistration();
        // cancel and fail the current registration attempt
        registration.cancel();
        result.completeExceptionally(new TimeoutException());
        // there should not be a second registration attempt
        verify(testGateway, atMost(1)).registrationCall(any(UUID.class), anyLong());
    } finally {
        rpc.stopService();
    }
}
Also used : TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) UUID(java.util.UUID) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 22 with TimeoutException

use of java.util.concurrent.TimeoutException in project hadoop by apache.

the class StorageLocationChecker method check.

/**
   * Initiate a check of the supplied storage volumes and return
   * a list of failed volumes.
   *
   * StorageLocations are returned in the same order as the input
   * for compatibility with existing unit tests.
   *
   * @param conf HDFS configuration.
   * @param dataDirs list of volumes to check.
   * @return returns a list of failed volumes. Returns the empty list if
   *         there are no failed volumes.
   *
   * @throws InterruptedException if the check was interrupted.
   * @throws IOException if the number of failed volumes exceeds the
   *                     maximum allowed or if there are no good
   *                     volumes.
   */
public List<StorageLocation> check(final Configuration conf, final Collection<StorageLocation> dataDirs) throws InterruptedException, IOException {
    final HashMap<StorageLocation, Boolean> goodLocations = new LinkedHashMap<>();
    final Set<StorageLocation> failedLocations = new HashSet<>();
    final Map<StorageLocation, ListenableFuture<VolumeCheckResult>> futures = Maps.newHashMap();
    final LocalFileSystem localFS = FileSystem.getLocal(conf);
    final CheckContext context = new CheckContext(localFS, expectedPermission);
    // Start parallel disk check operations on all StorageLocations.
    for (StorageLocation location : dataDirs) {
        goodLocations.put(location, true);
        Optional<ListenableFuture<VolumeCheckResult>> olf = delegateChecker.schedule(location, context);
        if (olf.isPresent()) {
            futures.put(location, olf.get());
        }
    }
    if (maxVolumeFailuresTolerated >= dataDirs.size()) {
        throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " + maxVolumeFailuresTolerated + ". Value configured is >= " + "to the number of configured volumes (" + dataDirs.size() + ").");
    }
    final long checkStartTimeMs = timer.monotonicNow();
    // Retrieve the results of the disk checks.
    for (Map.Entry<StorageLocation, ListenableFuture<VolumeCheckResult>> entry : futures.entrySet()) {
        // Determine how much time we can allow for this check to complete.
        // The cumulative wait time cannot exceed maxAllowedTimeForCheck.
        final long waitSoFarMs = (timer.monotonicNow() - checkStartTimeMs);
        final long timeLeftMs = Math.max(0, maxAllowedTimeForCheckMs - waitSoFarMs);
        final StorageLocation location = entry.getKey();
        try {
            final VolumeCheckResult result = entry.getValue().get(timeLeftMs, TimeUnit.MILLISECONDS);
            switch(result) {
                case HEALTHY:
                    break;
                case DEGRADED:
                    LOG.warn("StorageLocation {} appears to be degraded.", location);
                    break;
                case FAILED:
                    LOG.warn("StorageLocation {} detected as failed.", location);
                    failedLocations.add(location);
                    goodLocations.remove(location);
                    break;
                default:
                    LOG.error("Unexpected health check result {} for StorageLocation {}", result, location);
            }
        } catch (ExecutionException | TimeoutException e) {
            LOG.warn("Exception checking StorageLocation " + location, e.getCause());
            failedLocations.add(location);
            goodLocations.remove(location);
        }
    }
    if (failedLocations.size() > maxVolumeFailuresTolerated) {
        throw new DiskErrorException("Too many failed volumes - " + "current valid volumes: " + goodLocations.size() + ", volumes configured: " + dataDirs.size() + ", volumes failed: " + failedLocations.size() + ", volume failures tolerated: " + maxVolumeFailuresTolerated);
    }
    if (goodLocations.size() == 0) {
        throw new DiskErrorException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + failedLocations);
    }
    return new ArrayList<>(goodLocations.keySet());
}
Also used : CheckContext(org.apache.hadoop.hdfs.server.datanode.StorageLocation.CheckContext) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StorageLocation(org.apache.hadoop.hdfs.server.datanode.StorageLocation) ExecutionException(java.util.concurrent.ExecutionException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 23 with TimeoutException

use of java.util.concurrent.TimeoutException in project hadoop by apache.

the class MiniDFSCluster method setDataNodeStorageCapacities.

private synchronized void setDataNodeStorageCapacities(final int curDnIdx, final DataNode curDn, long[][] storageCapacities) throws IOException {
    if (storageCapacities == null || storageCapacities.length == 0) {
        return;
    }
    try {
        waitDataNodeFullyStarted(curDn);
    } catch (TimeoutException | InterruptedException e) {
        throw new IOException(e);
    }
    try (FsDatasetSpi.FsVolumeReferences volumes = curDn.getFSDataset().getFsVolumeReferences()) {
        assert storageCapacities[curDnIdx].length == storagesPerDatanode;
        assert volumes.size() == storagesPerDatanode;
        int j = 0;
        for (FsVolumeSpi fvs : volumes) {
            FsVolumeImpl volume = (FsVolumeImpl) fvs;
            LOG.info("setCapacityForTesting " + storageCapacities[curDnIdx][j] + " for [" + volume.getStorageType() + "]" + volume.getStorageID());
            volume.setCapacityForTesting(storageCapacities[curDnIdx][j]);
            j++;
        }
    }
    DataNodeTestUtils.triggerHeartbeat(curDn);
}
Also used : FsVolumeImpl(org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException)

Example 24 with TimeoutException

use of java.util.concurrent.TimeoutException in project hadoop by apache.

the class TestReplication method testNoExtraReplicationWhenBlockReceivedIsLate.

/**
   * This test makes sure that, when a file is closed before all
   * of the datanodes in the pipeline have reported their replicas,
   * the NameNode doesn't consider the block under-replicated too
   * aggressively. It is a regression test for HDFS-1172.
   */
@Test(timeout = 60000)
public void testNoExtraReplicationWhenBlockReceivedIsLate() throws Exception {
    LOG.info("Test block replication when blockReceived is late");
    final short numDataNodes = 3;
    final short replication = 3;
    final Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
    final String testFile = "/replication-test-file";
    final Path testPath = new Path(testFile);
    final BlockManager bm = cluster.getNameNode().getNamesystem().getBlockManager();
    try {
        cluster.waitActive();
        // Artificially delay IBR from 1 DataNode.
        // this ensures that the client's completeFile() RPC will get to the
        // NN before some of the replicas are reported.
        NameNode nn = cluster.getNameNode();
        DataNode dn = cluster.getDataNodes().get(0);
        DatanodeProtocolClientSideTranslatorPB spy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn);
        DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG);
        Mockito.doAnswer(delayer).when(spy).blockReceivedAndDeleted(Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageReceivedDeletedBlocks[]>anyObject());
        FileSystem fs = cluster.getFileSystem();
        // Create and close a small file with two blocks
        DFSTestUtil.createFile(fs, testPath, 1500, replication, 0);
        // schedule replication via BlockManager#computeReplicationWork
        BlockManagerTestUtil.computeAllPendingWork(bm);
        // Initially, should have some pending replication since the close()
        // is earlier than at lease one of the reportReceivedDeletedBlocks calls
        assertTrue(pendingReplicationCount(bm) > 0);
        // release pending IBR.
        delayer.waitForCall();
        delayer.proceed();
        delayer.waitForResult();
        // make sure DataNodes do replication work if exists
        for (DataNode d : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerHeartbeat(d);
        }
        // Wait until there is nothing pending
        try {
            GenericTestUtils.waitFor(new Supplier<Boolean>() {

                @Override
                public Boolean get() {
                    return pendingReplicationCount(bm) == 0;
                }
            }, 100, 3000);
        } catch (TimeoutException e) {
            fail("timed out while waiting for no pending replication.");
        }
        // Check that none of the datanodes have serviced a replication request.
        // i.e. that the NameNode didn't schedule any spurious replication.
        assertNoReplicationWasPerformed(cluster);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Configuration(org.apache.hadoop.conf.Configuration) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) StorageReceivedDeletedBlocks(org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 25 with TimeoutException

use of java.util.concurrent.TimeoutException in project hadoop by apache.

the class TestBlockReaderLocal method testStatistics.

private void testStatistics(boolean isShortCircuit) throws Exception {
    Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
    HdfsConfiguration conf = new HdfsConfiguration();
    TemporarySocketDirectory sockDir = null;
    if (isShortCircuit) {
        DFSInputStream.tcpReadsDisabledForTesting = true;
        sockDir = new TemporarySocketDirectory();
        conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(), "TestStatisticsForLocalRead.%d.sock").getAbsolutePath());
        conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);
        DomainSocket.disableBindPathValidation();
    } else {
        conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, false);
    }
    MiniDFSCluster cluster = null;
    final Path TEST_PATH = new Path("/a");
    final long RANDOM_SEED = 4567L;
    FSDataInputStream fsIn = null;
    byte[] original = new byte[BlockReaderLocalTest.TEST_LENGTH];
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).hosts(new String[] { NetUtils.getLocalHostname() }).build();
        cluster.waitActive();
        fs = cluster.getFileSystem();
        DFSTestUtil.createFile(fs, TEST_PATH, BlockReaderLocalTest.TEST_LENGTH, (short) 1, RANDOM_SEED);
        try {
            DFSTestUtil.waitReplication(fs, TEST_PATH, (short) 1);
        } catch (InterruptedException e) {
            Assert.fail("unexpected InterruptedException during " + "waitReplication: " + e);
        } catch (TimeoutException e) {
            Assert.fail("unexpected TimeoutException during " + "waitReplication: " + e);
        }
        fsIn = fs.open(TEST_PATH);
        IOUtils.readFully(fsIn, original, 0, BlockReaderLocalTest.TEST_LENGTH);
        HdfsDataInputStream dfsIn = (HdfsDataInputStream) fsIn;
        Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalBytesRead());
        Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalLocalBytesRead());
        if (isShortCircuit) {
            Assert.assertEquals(BlockReaderLocalTest.TEST_LENGTH, dfsIn.getReadStatistics().getTotalShortCircuitBytesRead());
        } else {
            Assert.assertEquals(0, dfsIn.getReadStatistics().getTotalShortCircuitBytesRead());
        }
        fsIn.close();
        fsIn = null;
    } finally {
        DFSInputStream.tcpReadsDisabledForTesting = false;
        if (fsIn != null)
            fsIn.close();
        if (fs != null)
            fs.close();
        if (cluster != null)
            cluster.shutdown();
        if (sockDir != null)
            sockDir.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) TemporarySocketDirectory(org.apache.hadoop.net.unix.TemporarySocketDirectory) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) HdfsDataInputStream(org.apache.hadoop.hdfs.client.HdfsDataInputStream) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

TimeoutException (java.util.concurrent.TimeoutException)717 ExecutionException (java.util.concurrent.ExecutionException)229 IOException (java.io.IOException)167 Test (org.junit.Test)131 CountDownLatch (java.util.concurrent.CountDownLatch)73 ArrayList (java.util.ArrayList)67 ExecutorService (java.util.concurrent.ExecutorService)62 Future (java.util.concurrent.Future)45 CancellationException (java.util.concurrent.CancellationException)44 Test (org.testng.annotations.Test)44 File (java.io.File)34 List (java.util.List)34 Map (java.util.Map)32 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)32 HashMap (java.util.HashMap)26 TimeUnit (java.util.concurrent.TimeUnit)26 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)21 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)21 URI (java.net.URI)20