Search in sources :

Example 51 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Caller must hold lock on the passed <code>state</code> object.
   * @param state
   * @param forceNewPlan
   */
private void assign(RegionState state, boolean forceNewPlan) {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        Configuration conf = server.getConfiguration();
        RegionPlan plan = null;
        long maxWaitTime = -1;
        HRegionInfo region = state.getRegion();
        Throwable previousException = null;
        for (int i = 1; i <= maximumAttempts; i++) {
            if (server.isStopped() || server.isAborted()) {
                LOG.info("Skip assigning " + region.getRegionNameAsString() + ", the server is stopped/aborted");
                return;
            }
            if (plan == null) {
                // Get a server for the region at first
                try {
                    plan = getRegionPlan(region, forceNewPlan);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
                }
            }
            if (plan == null) {
                LOG.warn("Unable to determine a plan to assign " + region);
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    if (i == maximumAttempts) {
                        // re-set attempt count to 0 for at least 1 retry
                        i = 0;
                        LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + " after maximumAttempts (" + this.maximumAttempts + "). Reset attempts count and continue retrying.");
                    }
                    waitForRetryingMetaAssignment();
                    continue;
                }
                regionStates.updateRegionState(region, State.FAILED_OPEN);
                return;
            }
            LOG.info("Assigning " + region.getRegionNameAsString() + " to " + plan.getDestination());
            // Transition RegionState to PENDING_OPEN
            regionStates.updateRegionState(region, State.PENDING_OPEN, plan.getDestination());
            boolean needNewPlan = false;
            final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() + " to " + plan.getDestination();
            try {
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes);
                // we're done
                return;
            } catch (Throwable t) {
                if (t instanceof RemoteException) {
                    t = ((RemoteException) t).unwrapRemoteException();
                }
                previousException = t;
                // Should we wait a little before retrying? If the server is starting it's yes.
                boolean hold = (t instanceof ServerNotRunningYetException);
                // In case socket is timed out and the region server is still online,
                // the openRegion RPC could have been accepted by the server and
                // just the response didn't go through.  So we will retry to
                // open the region on the same server.
                boolean retry = !hold && (t instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(plan.getDestination()));
                if (hold) {
                    LOG.warn(assignMsg + ", waiting a little before trying on the same region server " + "try=" + i + " of " + this.maximumAttempts, t);
                    if (maxWaitTime < 0) {
                        maxWaitTime = EnvironmentEdgeManager.currentTime() + this.server.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                    }
                    try {
                        long now = EnvironmentEdgeManager.currentTime();
                        if (now < maxWaitTime) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", t);
                            }
                            Thread.sleep(100);
                            // reset the try count
                            i--;
                        } else {
                            LOG.debug("Server is not up for a while; try a new one", t);
                            needNewPlan = true;
                        }
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
                        Thread.currentThread().interrupt();
                        return;
                    }
                } else if (retry) {
                    // we want to retry as many times as needed as long as the RS is not dead.
                    i--;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(assignMsg + ", trying to assign to the same region server due ", t);
                    }
                } else {
                    needNewPlan = true;
                    LOG.warn(assignMsg + ", trying to assign elsewhere instead;" + " try=" + i + " of " + this.maximumAttempts, t);
                }
            }
            if (i == this.maximumAttempts) {
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    // re-set attempt count to 0 for at least 1 retry
                    i = 0;
                    LOG.warn(assignMsg + ", trying to assign a hbase:meta region reached to maximumAttempts (" + this.maximumAttempts + ").  Reset attempt counts and continue retrying.");
                    waitForRetryingMetaAssignment();
                } else {
                    // This is the last try.
                    continue;
                }
            }
            // reassigning to same RS.
            if (needNewPlan) {
                // Force a new plan and reassign. Will return null if no servers.
                // The new plan could be the same as the existing plan since we don't
                // exclude the server of the original plan, which should not be
                // excluded since it could be the only server up now.
                RegionPlan newPlan = null;
                try {
                    newPlan = getRegionPlan(region, true);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
                }
                if (newPlan == null) {
                    regionStates.updateRegionState(region, State.FAILED_OPEN);
                    LOG.warn("Unable to find a viable location to assign region " + region.getRegionNameAsString());
                    return;
                }
                if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
                    // Clean out plan we failed execute and one that doesn't look like it'll
                    // succeed anyways; we need a new plan!
                    // Transition back to OFFLINE
                    regionStates.updateRegionState(region, State.OFFLINE);
                    plan = newPlan;
                } else if (plan.getDestination().equals(newPlan.getDestination()) && previousException instanceof FailedServerException) {
                    try {
                        LOG.info("Trying to re-assign " + region.getRegionNameAsString() + " to the same failed server.");
                        Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
                        Thread.currentThread().interrupt();
                        return;
                    }
                }
            }
        }
        // Run out of attempts
        regionStates.updateRegionState(region, State.FAILED_OPEN);
    } finally {
        metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 52 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class MasterFileSystem method bootstrap.

private static void bootstrap(final Path rd, final Configuration c) throws IOException {
    LOG.info("BOOTSTRAP: creating hbase:meta region");
    try {
        // Bootstrapping, make sure blockcache is off.  Else, one will be
        // created here in bootstrap and it'll need to be cleaned up.  Better to
        // not make it in first place.  Turn off block caching for bootstrap.
        // Enable after.
        HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
        HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
        setInfoFamilyCachingForMeta(metaDescriptor, false);
        HRegion meta = HRegion.createHRegion(metaHRI, rd, c, metaDescriptor, null);
        setInfoFamilyCachingForMeta(metaDescriptor, true);
        meta.close();
    } catch (IOException e) {
        e = e instanceof RemoteException ? ((RemoteException) e).unwrapRemoteException() : e;
        LOG.error("bootstrap", e);
        throw e;
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) FSTableDescriptors(org.apache.hadoop.hbase.util.FSTableDescriptors) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor)

Example 53 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class BlockingRpcConnection method readResponse.

/*
   * Receive a response. Because only one receiver, so no synchronization on in.
   */
private void readResponse() {
    Call call = null;
    boolean expectedCall = false;
    try {
        // See HBaseServer.Call.setResponse for where we write out the response.
        // Total size of the response. Unused. But have to read it in anyways.
        int totalSize = in.readInt();
        // Read the header
        ResponseHeader responseHeader = ResponseHeader.parseDelimitedFrom(in);
        int id = responseHeader.getCallId();
        // call.done have to be set before leaving this method
        call = calls.remove(id);
        expectedCall = (call != null && !call.isDone());
        if (!expectedCall) {
            // So we got a response for which we have no corresponding 'call' here on the client-side.
            // We probably timed out waiting, cleaned up all references, and now the server decides
            // to return a response. There is nothing we can do w/ the response at this stage. Clean
            // out the wire of the response so its out of the way and we can get other responses on
            // this connection.
            int readSoFar = getTotalSizeWhenWrittenDelimited(responseHeader);
            int whatIsLeftToRead = totalSize - readSoFar;
            IOUtils.skipFully(in, whatIsLeftToRead);
            if (call != null) {
                call.callStats.setResponseSizeBytes(totalSize);
                call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
            }
            return;
        }
        if (responseHeader.hasException()) {
            ExceptionResponse exceptionResponse = responseHeader.getException();
            RemoteException re = createRemoteException(exceptionResponse);
            call.setException(re);
            call.callStats.setResponseSizeBytes(totalSize);
            call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
            if (isFatalConnectionException(exceptionResponse)) {
                synchronized (this) {
                    closeConn(re);
                }
            }
        } else {
            Message value = null;
            if (call.responseDefaultType != null) {
                Builder builder = call.responseDefaultType.newBuilderForType();
                ProtobufUtil.mergeDelimitedFrom(builder, in);
                value = builder.build();
            }
            CellScanner cellBlockScanner = null;
            if (responseHeader.hasCellBlockMeta()) {
                int size = responseHeader.getCellBlockMeta().getLength();
                byte[] cellBlock = new byte[size];
                IOUtils.readFully(this.in, cellBlock, 0, cellBlock.length);
                cellBlockScanner = this.rpcClient.cellBlockBuilder.createCellScanner(this.codec, this.compressor, cellBlock);
            }
            call.setResponse(value, cellBlockScanner);
            call.callStats.setResponseSizeBytes(totalSize);
            call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
        }
    } catch (IOException e) {
        if (expectedCall) {
            call.setException(e);
        }
        if (e instanceof SocketTimeoutException) {
            // {@link ConnectionId#rpcTimeout}.
            if (LOG.isTraceEnabled()) {
                LOG.trace("ignored", e);
            }
        } else {
            synchronized (this) {
                closeConn(e);
            }
        }
    }
}
Also used : ResponseHeader(org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ResponseHeader) ExceptionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ExceptionResponse) SocketTimeoutException(java.net.SocketTimeoutException) Message(org.apache.hadoop.hbase.shaded.com.google.protobuf.Message) Builder(org.apache.hadoop.hbase.shaded.com.google.protobuf.Message.Builder) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) IPCUtil.createRemoteException(org.apache.hadoop.hbase.ipc.IPCUtil.createRemoteException) RemoteException(org.apache.hadoop.ipc.RemoteException) CellScanner(org.apache.hadoop.hbase.CellScanner)

Example 54 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class BlockingRpcConnection method handleSaslConnectionFailure.

/**
   * If multiple clients with the same principal try to connect to the same server at the same time,
   * the server assumes a replay attack is in progress. This is a feature of kerberos. In order to
   * work around this, what is done is that the client backs off randomly and tries to initiate the
   * connection again. The other problem is to do with ticket expiry. To handle that, a relogin is
   * attempted.
   * <p>
   * The retry logic is governed by the {@link #shouldAuthenticateOverKrb} method. In case when the
   * user doesn't have valid credentials, we don't need to retry (from cache or ticket). In such
   * cases, it is prudent to throw a runtime exception when we receive a SaslException from the
   * underlying authentication implementation, so there is no retry from other high level (for eg,
   * HCM or HBaseAdmin).
   * </p>
   */
private void handleSaslConnectionFailure(final int currRetries, final int maxRetries, final Exception ex, final UserGroupInformation user) throws IOException, InterruptedException {
    closeSocket();
    user.doAs(new PrivilegedExceptionAction<Object>() {

        @Override
        public Object run() throws IOException, InterruptedException {
            if (shouldAuthenticateOverKrb()) {
                if (currRetries < maxRetries) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Exception encountered while connecting to " + "the server : " + ex);
                    }
                    // try re-login
                    relogin();
                    disposeSasl();
                    // have granularity of milliseconds
                    // we are sleeping with the Connection lock held but since this
                    // connection instance is being used for connecting to the server
                    // in question, it is okay
                    Thread.sleep(ThreadLocalRandom.current().nextInt(reloginMaxBackoff) + 1);
                    return null;
                } else {
                    String msg = "Couldn't setup connection for " + UserGroupInformation.getLoginUser().getUserName() + " to " + serverPrincipal;
                    LOG.warn(msg, ex);
                    throw (IOException) new IOException(msg).initCause(ex);
                }
            } else {
                LOG.warn("Exception encountered while connecting to " + "the server : " + ex);
            }
            if (ex instanceof RemoteException) {
                throw (RemoteException) ex;
            }
            if (ex instanceof SaslException) {
                String msg = "SASL authentication failed." + " The most likely cause is missing or invalid credentials." + " Consider 'kinit'.";
                LOG.fatal(msg, ex);
                throw new RuntimeException(msg, ex);
            }
            throw new IOException(ex);
        }
    });
}
Also used : DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) IPCUtil.createRemoteException(org.apache.hadoop.hbase.ipc.IPCUtil.createRemoteException) RemoteException(org.apache.hadoop.ipc.RemoteException) SaslException(javax.security.sasl.SaslException)

Example 55 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class NettyRpcDuplexHandler method readResponse.

private void readResponse(ChannelHandlerContext ctx, ByteBuf buf) throws IOException {
    int totalSize = buf.readInt();
    ByteBufInputStream in = new ByteBufInputStream(buf);
    ResponseHeader responseHeader = ResponseHeader.parseDelimitedFrom(in);
    int id = responseHeader.getCallId();
    if (LOG.isTraceEnabled()) {
        LOG.trace("got response header " + TextFormat.shortDebugString(responseHeader) + ", totalSize: " + totalSize + " bytes");
    }
    RemoteException remoteExc;
    if (responseHeader.hasException()) {
        ExceptionResponse exceptionResponse = responseHeader.getException();
        remoteExc = IPCUtil.createRemoteException(exceptionResponse);
        if (IPCUtil.isFatalConnectionException(exceptionResponse)) {
            // Here we will cleanup all calls so do not need to fall back, just return.
            exceptionCaught(ctx, remoteExc);
            return;
        }
    } else {
        remoteExc = null;
    }
    Call call = id2Call.remove(id);
    if (call == null) {
        // So we got a response for which we have no corresponding 'call' here on the client-side.
        // We probably timed out waiting, cleaned up all references, and now the server decides
        // to return a response. There is nothing we can do w/ the response at this stage. Clean
        // out the wire of the response so its out of the way and we can get other responses on
        // this connection.
        int readSoFar = IPCUtil.getTotalSizeWhenWrittenDelimited(responseHeader);
        int whatIsLeftToRead = totalSize - readSoFar;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Unknown callId: " + id + ", skipping over this response of " + whatIsLeftToRead + " bytes");
        }
        return;
    }
    if (remoteExc != null) {
        call.setException(remoteExc);
        return;
    }
    Message value;
    if (call.responseDefaultType != null) {
        Builder builder = call.responseDefaultType.newBuilderForType();
        builder.mergeDelimitedFrom(in);
        value = builder.build();
    } else {
        value = null;
    }
    CellScanner cellBlockScanner;
    if (responseHeader.hasCellBlockMeta()) {
        int size = responseHeader.getCellBlockMeta().getLength();
        // Maybe we could read directly from the ByteBuf.
        // The problem here is that we do not know when to release it.
        byte[] cellBlock = new byte[size];
        buf.readBytes(cellBlock);
        cellBlockScanner = cellBlockBuilder.createCellScanner(this.codec, this.compressor, cellBlock);
    } else {
        cellBlockScanner = null;
    }
    call.setResponse(value, cellBlockScanner);
}
Also used : ResponseHeader(org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ResponseHeader) ExceptionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ExceptionResponse) Message(org.apache.hadoop.hbase.shaded.com.google.protobuf.Message) Builder(org.apache.hadoop.hbase.shaded.com.google.protobuf.Message.Builder) ByteBufInputStream(io.netty.buffer.ByteBufInputStream) RemoteException(org.apache.hadoop.ipc.RemoteException) CellScanner(org.apache.hadoop.hbase.CellScanner)

Aggregations

RemoteException (org.apache.hadoop.ipc.RemoteException)99 IOException (java.io.IOException)53 Test (org.junit.Test)39 Path (org.apache.hadoop.fs.Path)36 Configuration (org.apache.hadoop.conf.Configuration)20 FileNotFoundException (java.io.FileNotFoundException)19 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 InterruptedIOException (java.io.InterruptedIOException)10 AccessControlException (org.apache.hadoop.security.AccessControlException)10 ServerName (org.apache.hadoop.hbase.ServerName)9 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)8 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)8 FileAlreadyExistsException (org.apache.hadoop.fs.FileAlreadyExistsException)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 EOFException (java.io.EOFException)6 ArrayList (java.util.ArrayList)6 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)6 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)6