Search in sources :

Example 26 with SocketTimeoutException

use of java.net.SocketTimeoutException in project hbase by apache.

the class HBaseInterClusterReplicationEndpoint method replicate.

/**
   * Do the shipping logic
   */
@Override
public boolean replicate(ReplicateContext replicateContext) {
    CompletionService<Integer> pool = new ExecutorCompletionService<>(this.exec);
    List<Entry> entries = replicateContext.getEntries();
    String walGroupId = replicateContext.getWalGroupId();
    int sleepMultiplier = 1;
    int numReplicated = 0;
    if (!peersSelected && this.isRunning()) {
        connectToPeers();
        peersSelected = true;
    }
    int numSinks = replicationSinkMgr.getNumSinks();
    if (numSinks == 0) {
        LOG.warn("No replication sinks found, returning without replicating. The source should retry" + " with the same set of edits.");
        return false;
    }
    // minimum of: configured threads, number of 100-waledit batches,
    //  and number of current sinks
    int n = Math.min(Math.min(this.maxThreads, entries.size() / 100 + 1), numSinks);
    List<List<Entry>> entryLists = new ArrayList<>(n);
    if (n == 1) {
        entryLists.add(entries);
    } else {
        for (int i = 0; i < n; i++) {
            entryLists.add(new ArrayList<>(entries.size() / n + 1));
        }
        // now group by region
        for (Entry e : entries) {
            entryLists.get(Math.abs(Bytes.hashCode(e.getKey().getEncodedRegionName()) % n)).add(e);
        }
    }
    while (this.isRunning() && !exec.isShutdown()) {
        if (!isPeerEnabled()) {
            if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
                sleepMultiplier++;
            }
            continue;
        }
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Replicating " + entries.size() + " entries of total size " + replicateContext.getSize());
            }
            int futures = 0;
            for (int i = 0; i < entryLists.size(); i++) {
                if (!entryLists.get(i).isEmpty()) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Submitting " + entryLists.get(i).size() + " entries of total size " + replicateContext.getSize());
                    }
                    // RuntimeExceptions encountered here bubble up and are handled in ReplicationSource
                    pool.submit(createReplicator(entryLists.get(i), i));
                    futures++;
                }
            }
            IOException iox = null;
            for (int i = 0; i < futures; i++) {
                try {
                    // wait for all futures, remove successful parts
                    // (only the remaining parts will be retried)
                    Future<Integer> f = pool.take();
                    int index = f.get().intValue();
                    int batchSize = entryLists.get(index).size();
                    entryLists.set(index, Collections.<Entry>emptyList());
                    // Now, we have marked the batch as done replicating, record its size
                    numReplicated += batchSize;
                } catch (InterruptedException ie) {
                    iox = new IOException(ie);
                } catch (ExecutionException ee) {
                    // cause must be an IOException
                    iox = (IOException) ee.getCause();
                }
            }
            if (iox != null) {
                // if we had any exceptions, try again
                throw iox;
            }
            if (numReplicated != entries.size()) {
                // Something went wrong here and we don't know what, let's just fail and retry.
                LOG.warn("The number of edits replicated is different from the number received," + " failing for now.");
                return false;
            }
            // update metrics
            this.metrics.setAgeOfLastShippedOp(entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
            return true;
        } catch (IOException ioe) {
            // Didn't ship anything, but must still age the last time we did
            this.metrics.refreshAgeOfLastShippedOp(walGroupId);
            if (ioe instanceof RemoteException) {
                ioe = ((RemoteException) ioe).unwrapRemoteException();
                LOG.warn("Can't replicate because of an error on the remote cluster: ", ioe);
                if (ioe instanceof TableNotFoundException) {
                    if (sleepForRetries("A table is missing in the peer cluster. " + "Replication cannot proceed without losing data.", sleepMultiplier)) {
                        sleepMultiplier++;
                    }
                } else if (ioe instanceof SaslException) {
                    LOG.warn("Peer encountered SaslException, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                }
            } else {
                if (ioe instanceof SocketTimeoutException) {
                    // This exception means we waited for more than 60s and nothing
                    // happened, the cluster is alive and calling it right away
                    // even for a test just makes things worse.
                    sleepForRetries("Encountered a SocketTimeoutException. Since the " + "call to the remote cluster timed out, which is usually " + "caused by a machine failure or a massive slowdown", this.socketTimeoutMultiplier);
                } else if (ioe instanceof ConnectException) {
                    LOG.warn("Peer is unavailable, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                } else {
                    LOG.warn("Can't replicate because of a local or network error: ", ioe);
                }
            }
            if (sleepForRetries("Since we are unable to replicate", sleepMultiplier)) {
                sleepMultiplier++;
            }
        }
    }
    // in case we exited before replicating
    return false;
}
Also used : ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) SaslException(javax.security.sasl.SaslException) HBaseReplicationEndpoint(org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) Entry(org.apache.hadoop.hbase.wal.WAL.Entry) SocketTimeoutException(java.net.SocketTimeoutException) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) RemoteException(org.apache.hadoop.ipc.RemoteException) ConnectException(java.net.ConnectException)

Example 27 with SocketTimeoutException

use of java.net.SocketTimeoutException in project hbase by apache.

the class BlockingRpcConnection method processResponseForConnectionHeader.

private void processResponseForConnectionHeader() throws IOException {
    // if no response excepted, return
    if (!waitingConnectionHeaderResponse)
        return;
    try {
        // read the ConnectionHeaderResponse from server
        int len = this.in.readInt();
        byte[] buff = new byte[len];
        int readSize = this.in.read(buff);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Length of response for connection header:" + readSize);
        }
        RPCProtos.ConnectionHeaderResponse connectionHeaderResponse = RPCProtos.ConnectionHeaderResponse.parseFrom(buff);
        // Get the CryptoCipherMeta, update the HBaseSaslRpcClient for Crypto Cipher
        if (connectionHeaderResponse.hasCryptoCipherMeta()) {
            negotiateCryptoAes(connectionHeaderResponse.getCryptoCipherMeta());
        }
        waitingConnectionHeaderResponse = false;
    } catch (SocketTimeoutException ste) {
        LOG.fatal("Can't get the connection header response for rpc timeout, please check if" + " server has the correct configuration to support the additional function.", ste);
        // timeout when waiting the connection header response, ignore the additional function
        throw new IOException("Timeout while waiting connection header response", ste);
    }
}
Also used : SocketTimeoutException(java.net.SocketTimeoutException) RPCProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 28 with SocketTimeoutException

use of java.net.SocketTimeoutException in project hbase by apache.

the class RpcRetryingCallerImpl method callWithRetries.

@Override
public T callWithRetries(RetryingCallable<T> callable, int callTimeout) throws IOException, RuntimeException {
    List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions = new ArrayList<>();
    tracker.start();
    context.clear();
    for (int tries = 0; ; tries++) {
        long expectedSleep;
        try {
            // bad cache entries are cleared in the call to RetryingCallable#throwable() in catch block
            callable.prepare(tries != 0);
            interceptor.intercept(context.prepare(callable, tries));
            return callable.call(getTimeout(callTimeout));
        } catch (PreemptiveFastFailException e) {
            throw e;
        } catch (Throwable t) {
            Throwable e = t.getCause();
            ExceptionUtil.rethrowIfInterrupt(t);
            // translateException throws exception when should not retry: i.e. when request is bad.
            interceptor.handleFailure(context, t);
            t = translateException(t);
            if (tries > startLogErrorsCnt) {
                LOG.info("Call exception, tries=" + tries + ", maxAttempts=" + maxAttempts + ", started=" + (EnvironmentEdgeManager.currentTime() - tracker.getStartTime()) + " ms ago, " + "cancelled=" + cancelled.get() + ", msg=" + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail());
            }
            callable.throwable(t, maxAttempts != 1);
            RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext(t, EnvironmentEdgeManager.currentTime(), toString());
            exceptions.add(qt);
            if (tries >= maxAttempts - 1) {
                throw new RetriesExhaustedException(tries, exceptions);
            }
            // If the server is dead, we need to wait a little before retrying, to give
            // a chance to the regions to be moved
            // get right pause time, start by RETRY_BACKOFF[0] * pauseBase, where pauseBase might be
            // special when encountering CallQueueTooBigException, see #HBASE-17114
            long pauseBase = (t instanceof CallQueueTooBigException) ? pauseForCQTBE : pause;
            expectedSleep = callable.sleep(pauseBase, tries);
            // If, after the planned sleep, there won't be enough time left, we stop now.
            long duration = singleCallDuration(expectedSleep);
            if (duration > callTimeout) {
                String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration + ": " + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail();
                throw (SocketTimeoutException) (new SocketTimeoutException(msg).initCause(t));
            }
        } finally {
            interceptor.updateFailureInfo(context);
        }
        try {
            if (expectedSleep > 0) {
                synchronized (cancelled) {
                    if (cancelled.get())
                        return null;
                    cancelled.wait(expectedSleep);
                }
            }
            if (cancelled.get())
                return null;
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Interrupted after " + tries + " tries while maxAttempts=" + maxAttempts);
        }
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) ArrayList(java.util.ArrayList) PreemptiveFastFailException(org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException) SocketTimeoutException(java.net.SocketTimeoutException)

Example 29 with SocketTimeoutException

use of java.net.SocketTimeoutException in project hbase by apache.

the class TestClientOperationInterrupt method testInterrupt50Percent.

@Test
public void testInterrupt50Percent() throws IOException, InterruptedException {
    final AtomicInteger noEx = new AtomicInteger(0);
    final AtomicInteger badEx = new AtomicInteger(0);
    final AtomicInteger noInt = new AtomicInteger(0);
    final AtomicInteger done = new AtomicInteger(0);
    List<Thread> threads = new ArrayList<>();
    final int nbThread = 100;
    for (int i = 0; i < nbThread; i++) {
        Thread t = new Thread() {

            @Override
            public void run() {
                try {
                    Table ht = util.getConnection().getTable(tableName);
                    Result r = ht.get(new Get(row1));
                    noEx.incrementAndGet();
                } catch (IOException e) {
                    LOG.info("exception", e);
                    if (!(e instanceof InterruptedIOException) || (e instanceof SocketTimeoutException)) {
                        badEx.incrementAndGet();
                    } else {
                        if (Thread.currentThread().isInterrupted()) {
                            noInt.incrementAndGet();
                            LOG.info("The thread should NOT be with the 'interrupt' status.");
                        }
                    }
                } finally {
                    done.incrementAndGet();
                }
            }
        };
        t.setName("TestClientOperationInterrupt #" + i);
        threads.add(t);
        t.start();
    }
    for (int i = 0; i < nbThread / 2; i++) {
        threads.get(i).interrupt();
    }
    boolean stillAlive = true;
    while (stillAlive) {
        stillAlive = false;
        for (Thread t : threads) {
            if (t.isAlive()) {
                stillAlive = true;
            }
        }
        Threads.sleep(10);
    }
    Assert.assertFalse(Thread.currentThread().isInterrupted());
    Assert.assertTrue(" noEx: " + noEx.get() + ", badEx=" + badEx.get() + ", noInt=" + noInt.get(), noEx.get() == nbThread / 2 && badEx.get() == 0);
    // The problem here is that we need the server to free its handlers to handle all operations
    while (done.get() != nbThread) {
        Thread.sleep(1);
    }
    Table ht = util.getConnection().getTable(tableName);
    Result r = ht.get(new Get(row1));
    Assert.assertFalse(r.isEmpty());
}
Also used : InterruptedIOException(java.io.InterruptedIOException) SocketTimeoutException(java.net.SocketTimeoutException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Test(org.junit.Test)

Example 30 with SocketTimeoutException

use of java.net.SocketTimeoutException in project hbase by apache.

the class TestHCM method testGetOperationTimeout.

/**
   * Test that an operation can fail if we read the global operation timeout, even if the
   * individual timeout is fine. We do that with:
   * - client side: an operation timeout of 30 seconds
   * - server side: we sleep 20 second at each attempt. The first work fails, the second one
   * succeeds. But the client won't wait that much, because 20 + 20 > 30, so the client
   * timeouted when the server answers.
   */
@Test
public void testGetOperationTimeout() throws Exception {
    HTableDescriptor hdt = TEST_UTIL.createTableDescriptor(TableName.valueOf(name.getMethodName()));
    hdt.addCoprocessor(SleepAndFailFirstTime.class.getName());
    Table table = TEST_UTIL.createTable(hdt, new byte[][] { FAM_NAM }, TEST_UTIL.getConfiguration());
    table.setRpcTimeout(Integer.MAX_VALUE);
    SleepAndFailFirstTime.ct.set(0);
    // Check that it works if the timeout is big enough
    table.setOperationTimeout(120 * 1000);
    table.get(new Get(FAM_NAM));
    // Resetting and retrying. Will fail this time, not enough time for the second try
    SleepAndFailFirstTime.ct.set(0);
    try {
        table.setOperationTimeout(30 * 1000);
        table.get(new Get(FAM_NAM));
        Assert.fail("We expect an exception here");
    } catch (SocketTimeoutException e) {
        // The client has a CallTimeout class, but it's not shared.We're not very clean today,
        //  in the general case you can expect the call to stop, but the exception may vary.
        // In this test however, we're sure that it will be a socket timeout.
        LOG.info("We received an exception, as expected ", e);
    } catch (IOException e) {
        Assert.fail("Wrong exception:" + e.getMessage());
    } finally {
        table.close();
    }
}
Also used : SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Test(org.junit.Test)

Aggregations

SocketTimeoutException (java.net.SocketTimeoutException)369 IOException (java.io.IOException)200 Test (org.junit.Test)91 Socket (java.net.Socket)52 SocketException (java.net.SocketException)46 InputStream (java.io.InputStream)43 ServerSocket (java.net.ServerSocket)42 InetSocketAddress (java.net.InetSocketAddress)38 ConnectException (java.net.ConnectException)34 UnknownHostException (java.net.UnknownHostException)31 OutputStream (java.io.OutputStream)27 MalformedURLException (java.net.MalformedURLException)27 URL (java.net.URL)27 DatagramPacket (java.net.DatagramPacket)25 HttpURLConnection (java.net.HttpURLConnection)23 HashMap (java.util.HashMap)21 File (java.io.File)20 ArrayList (java.util.ArrayList)20 InterruptedIOException (java.io.InterruptedIOException)19 BufferedInputStream (java.io.BufferedInputStream)18