use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class AssignmentManager method assign.
/**
* Caller must hold lock on the passed <code>state</code> object.
* @param state
* @param forceNewPlan
*/
private void assign(RegionState state, boolean forceNewPlan) {
long startTime = EnvironmentEdgeManager.currentTime();
try {
Configuration conf = server.getConfiguration();
RegionPlan plan = null;
long maxWaitTime = -1;
HRegionInfo region = state.getRegion();
Throwable previousException = null;
for (int i = 1; i <= maximumAttempts; i++) {
if (server.isStopped() || server.isAborted()) {
LOG.info("Skip assigning " + region.getRegionNameAsString() + ", the server is stopped/aborted");
return;
}
if (plan == null) {
// Get a server for the region at first
try {
plan = getRegionPlan(region, forceNewPlan);
} catch (HBaseIOException e) {
LOG.warn("Failed to get region plan", e);
}
}
if (plan == null) {
LOG.warn("Unable to determine a plan to assign " + region);
// For meta region, we have to keep retrying until succeeding
if (region.isMetaRegion()) {
if (i == maximumAttempts) {
// re-set attempt count to 0 for at least 1 retry
i = 0;
LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + " after maximumAttempts (" + this.maximumAttempts + "). Reset attempts count and continue retrying.");
}
waitForRetryingMetaAssignment();
continue;
}
regionStates.updateRegionState(region, State.FAILED_OPEN);
return;
}
LOG.info("Assigning " + region.getRegionNameAsString() + " to " + plan.getDestination());
// Transition RegionState to PENDING_OPEN
regionStates.updateRegionState(region, State.PENDING_OPEN, plan.getDestination());
boolean needNewPlan = false;
final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() + " to " + plan.getDestination();
try {
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (shouldAssignFavoredNodes(region)) {
favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
}
serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes);
// we're done
return;
} catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException();
}
previousException = t;
// Should we wait a little before retrying? If the server is starting it's yes.
boolean hold = (t instanceof ServerNotRunningYetException);
// In case socket is timed out and the region server is still online,
// the openRegion RPC could have been accepted by the server and
// just the response didn't go through. So we will retry to
// open the region on the same server.
boolean retry = !hold && (t instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(plan.getDestination()));
if (hold) {
LOG.warn(assignMsg + ", waiting a little before trying on the same region server " + "try=" + i + " of " + this.maximumAttempts, t);
if (maxWaitTime < 0) {
maxWaitTime = EnvironmentEdgeManager.currentTime() + this.server.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
}
try {
long now = EnvironmentEdgeManager.currentTime();
if (now < maxWaitTime) {
if (LOG.isDebugEnabled()) {
LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", t);
}
Thread.sleep(100);
// reset the try count
i--;
} else {
LOG.debug("Server is not up for a while; try a new one", t);
needNewPlan = true;
}
} catch (InterruptedException ie) {
LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
regionStates.updateRegionState(region, State.FAILED_OPEN);
Thread.currentThread().interrupt();
return;
}
} else if (retry) {
// we want to retry as many times as needed as long as the RS is not dead.
i--;
if (LOG.isDebugEnabled()) {
LOG.debug(assignMsg + ", trying to assign to the same region server due ", t);
}
} else {
needNewPlan = true;
LOG.warn(assignMsg + ", trying to assign elsewhere instead;" + " try=" + i + " of " + this.maximumAttempts, t);
}
}
if (i == this.maximumAttempts) {
// For meta region, we have to keep retrying until succeeding
if (region.isMetaRegion()) {
// re-set attempt count to 0 for at least 1 retry
i = 0;
LOG.warn(assignMsg + ", trying to assign a hbase:meta region reached to maximumAttempts (" + this.maximumAttempts + "). Reset attempt counts and continue retrying.");
waitForRetryingMetaAssignment();
} else {
// This is the last try.
continue;
}
}
// reassigning to same RS.
if (needNewPlan) {
// Force a new plan and reassign. Will return null if no servers.
// The new plan could be the same as the existing plan since we don't
// exclude the server of the original plan, which should not be
// excluded since it could be the only server up now.
RegionPlan newPlan = null;
try {
newPlan = getRegionPlan(region, true);
} catch (HBaseIOException e) {
LOG.warn("Failed to get region plan", e);
}
if (newPlan == null) {
regionStates.updateRegionState(region, State.FAILED_OPEN);
LOG.warn("Unable to find a viable location to assign region " + region.getRegionNameAsString());
return;
}
if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
// Clean out plan we failed execute and one that doesn't look like it'll
// succeed anyways; we need a new plan!
// Transition back to OFFLINE
regionStates.updateRegionState(region, State.OFFLINE);
plan = newPlan;
} else if (plan.getDestination().equals(newPlan.getDestination()) && previousException instanceof FailedServerException) {
try {
LOG.info("Trying to re-assign " + region.getRegionNameAsString() + " to the same failed server.");
Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
} catch (InterruptedException ie) {
LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
regionStates.updateRegionState(region, State.FAILED_OPEN);
Thread.currentThread().interrupt();
return;
}
}
}
}
// Run out of attempts
regionStates.updateRegionState(region, State.FAILED_OPEN);
} finally {
metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime);
}
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class MasterFileSystem method bootstrap.
private static void bootstrap(final Path rd, final Configuration c) throws IOException {
LOG.info("BOOTSTRAP: creating hbase:meta region");
try {
// Bootstrapping, make sure blockcache is off. Else, one will be
// created here in bootstrap and it'll need to be cleaned up. Better to
// not make it in first place. Turn off block caching for bootstrap.
// Enable after.
HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
setInfoFamilyCachingForMeta(metaDescriptor, false);
HRegion meta = HRegion.createHRegion(metaHRI, rd, c, metaDescriptor, null);
setInfoFamilyCachingForMeta(metaDescriptor, true);
meta.close();
} catch (IOException e) {
e = e instanceof RemoteException ? ((RemoteException) e).unwrapRemoteException() : e;
LOG.error("bootstrap", e);
throw e;
}
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class BlockingRpcConnection method readResponse.
/*
* Receive a response. Because only one receiver, so no synchronization on in.
*/
private void readResponse() {
Call call = null;
boolean expectedCall = false;
try {
// See HBaseServer.Call.setResponse for where we write out the response.
// Total size of the response. Unused. But have to read it in anyways.
int totalSize = in.readInt();
// Read the header
ResponseHeader responseHeader = ResponseHeader.parseDelimitedFrom(in);
int id = responseHeader.getCallId();
// call.done have to be set before leaving this method
call = calls.remove(id);
expectedCall = (call != null && !call.isDone());
if (!expectedCall) {
// So we got a response for which we have no corresponding 'call' here on the client-side.
// We probably timed out waiting, cleaned up all references, and now the server decides
// to return a response. There is nothing we can do w/ the response at this stage. Clean
// out the wire of the response so its out of the way and we can get other responses on
// this connection.
int readSoFar = getTotalSizeWhenWrittenDelimited(responseHeader);
int whatIsLeftToRead = totalSize - readSoFar;
IOUtils.skipFully(in, whatIsLeftToRead);
if (call != null) {
call.callStats.setResponseSizeBytes(totalSize);
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
}
return;
}
if (responseHeader.hasException()) {
ExceptionResponse exceptionResponse = responseHeader.getException();
RemoteException re = createRemoteException(exceptionResponse);
call.setException(re);
call.callStats.setResponseSizeBytes(totalSize);
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
if (isFatalConnectionException(exceptionResponse)) {
synchronized (this) {
closeConn(re);
}
}
} else {
Message value = null;
if (call.responseDefaultType != null) {
Builder builder = call.responseDefaultType.newBuilderForType();
ProtobufUtil.mergeDelimitedFrom(builder, in);
value = builder.build();
}
CellScanner cellBlockScanner = null;
if (responseHeader.hasCellBlockMeta()) {
int size = responseHeader.getCellBlockMeta().getLength();
byte[] cellBlock = new byte[size];
IOUtils.readFully(this.in, cellBlock, 0, cellBlock.length);
cellBlockScanner = this.rpcClient.cellBlockBuilder.createCellScanner(this.codec, this.compressor, cellBlock);
}
call.setResponse(value, cellBlockScanner);
call.callStats.setResponseSizeBytes(totalSize);
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.callStats.getStartTime());
}
} catch (IOException e) {
if (expectedCall) {
call.setException(e);
}
if (e instanceof SocketTimeoutException) {
// {@link ConnectionId#rpcTimeout}.
if (LOG.isTraceEnabled()) {
LOG.trace("ignored", e);
}
} else {
synchronized (this) {
closeConn(e);
}
}
}
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class BlockingRpcConnection method handleSaslConnectionFailure.
/**
* If multiple clients with the same principal try to connect to the same server at the same time,
* the server assumes a replay attack is in progress. This is a feature of kerberos. In order to
* work around this, what is done is that the client backs off randomly and tries to initiate the
* connection again. The other problem is to do with ticket expiry. To handle that, a relogin is
* attempted.
* <p>
* The retry logic is governed by the {@link #shouldAuthenticateOverKrb} method. In case when the
* user doesn't have valid credentials, we don't need to retry (from cache or ticket). In such
* cases, it is prudent to throw a runtime exception when we receive a SaslException from the
* underlying authentication implementation, so there is no retry from other high level (for eg,
* HCM or HBaseAdmin).
* </p>
*/
private void handleSaslConnectionFailure(final int currRetries, final int maxRetries, final Exception ex, final UserGroupInformation user) throws IOException, InterruptedException {
closeSocket();
user.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws IOException, InterruptedException {
if (shouldAuthenticateOverKrb()) {
if (currRetries < maxRetries) {
if (LOG.isDebugEnabled()) {
LOG.debug("Exception encountered while connecting to " + "the server : " + ex);
}
// try re-login
relogin();
disposeSasl();
// have granularity of milliseconds
// we are sleeping with the Connection lock held but since this
// connection instance is being used for connecting to the server
// in question, it is okay
Thread.sleep(ThreadLocalRandom.current().nextInt(reloginMaxBackoff) + 1);
return null;
} else {
String msg = "Couldn't setup connection for " + UserGroupInformation.getLoginUser().getUserName() + " to " + serverPrincipal;
LOG.warn(msg, ex);
throw (IOException) new IOException(msg).initCause(ex);
}
} else {
LOG.warn("Exception encountered while connecting to " + "the server : " + ex);
}
if (ex instanceof RemoteException) {
throw (RemoteException) ex;
}
if (ex instanceof SaslException) {
String msg = "SASL authentication failed." + " The most likely cause is missing or invalid credentials." + " Consider 'kinit'.";
LOG.fatal(msg, ex);
throw new RuntimeException(msg, ex);
}
throw new IOException(ex);
}
});
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class NettyRpcDuplexHandler method readResponse.
private void readResponse(ChannelHandlerContext ctx, ByteBuf buf) throws IOException {
int totalSize = buf.readInt();
ByteBufInputStream in = new ByteBufInputStream(buf);
ResponseHeader responseHeader = ResponseHeader.parseDelimitedFrom(in);
int id = responseHeader.getCallId();
if (LOG.isTraceEnabled()) {
LOG.trace("got response header " + TextFormat.shortDebugString(responseHeader) + ", totalSize: " + totalSize + " bytes");
}
RemoteException remoteExc;
if (responseHeader.hasException()) {
ExceptionResponse exceptionResponse = responseHeader.getException();
remoteExc = IPCUtil.createRemoteException(exceptionResponse);
if (IPCUtil.isFatalConnectionException(exceptionResponse)) {
// Here we will cleanup all calls so do not need to fall back, just return.
exceptionCaught(ctx, remoteExc);
return;
}
} else {
remoteExc = null;
}
Call call = id2Call.remove(id);
if (call == null) {
// So we got a response for which we have no corresponding 'call' here on the client-side.
// We probably timed out waiting, cleaned up all references, and now the server decides
// to return a response. There is nothing we can do w/ the response at this stage. Clean
// out the wire of the response so its out of the way and we can get other responses on
// this connection.
int readSoFar = IPCUtil.getTotalSizeWhenWrittenDelimited(responseHeader);
int whatIsLeftToRead = totalSize - readSoFar;
if (LOG.isDebugEnabled()) {
LOG.debug("Unknown callId: " + id + ", skipping over this response of " + whatIsLeftToRead + " bytes");
}
return;
}
if (remoteExc != null) {
call.setException(remoteExc);
return;
}
Message value;
if (call.responseDefaultType != null) {
Builder builder = call.responseDefaultType.newBuilderForType();
builder.mergeDelimitedFrom(in);
value = builder.build();
} else {
value = null;
}
CellScanner cellBlockScanner;
if (responseHeader.hasCellBlockMeta()) {
int size = responseHeader.getCellBlockMeta().getLength();
// Maybe we could read directly from the ByteBuf.
// The problem here is that we do not know when to release it.
byte[] cellBlock = new byte[size];
buf.readBytes(cellBlock);
cellBlockScanner = cellBlockBuilder.createCellScanner(this.codec, this.compressor, cellBlock);
} else {
cellBlockScanner = null;
}
call.setResponse(value, cellBlockScanner);
}
Aggregations