use of org.apache.hadoop.hbase.util.RetryCounter in project hbase by apache.
the class RecoverableZooKeeper method getChildren.
/**
* getChildren is an idempotent operation. Retry before throwing exception
* @return List of children znodes
*/
public List<String> getChildren(String path, Watcher watcher) throws KeeperException, InterruptedException {
TraceScope traceScope = null;
try {
traceScope = Trace.startSpan("RecoverableZookeeper.getChildren");
RetryCounter retryCounter = retryCounterFactory.create();
while (true) {
try {
return checkZk().getChildren(path, watcher);
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case OPERATIONTIMEOUT:
retryOrThrow(retryCounter, e, "getChildren");
break;
default:
throw e;
}
}
retryCounter.sleepUntilNextRetry();
}
} finally {
if (traceScope != null)
traceScope.close();
}
}
use of org.apache.hadoop.hbase.util.RetryCounter in project hbase by apache.
the class RecoverableZooKeeper method getData.
/**
* getData is an idempotent operation. Retry before throwing exception
* @return Data
*/
public byte[] getData(String path, Watcher watcher, Stat stat) throws KeeperException, InterruptedException {
TraceScope traceScope = null;
try {
traceScope = Trace.startSpan("RecoverableZookeeper.getData");
RetryCounter retryCounter = retryCounterFactory.create();
while (true) {
try {
byte[] revData = checkZk().getData(path, watcher, stat);
return removeMetaData(revData);
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case OPERATIONTIMEOUT:
retryOrThrow(retryCounter, e, "getData");
break;
default:
throw e;
}
}
retryCounter.sleepUntilNextRetry();
}
} finally {
if (traceScope != null)
traceScope.close();
}
}
use of org.apache.hadoop.hbase.util.RetryCounter in project hbase by apache.
the class RecoverableZooKeeper method exists.
/**
* exists is an idempotent operation. Retry before throwing exception
* @return A Stat instance
*/
public Stat exists(String path, Watcher watcher) throws KeeperException, InterruptedException {
TraceScope traceScope = null;
try {
traceScope = Trace.startSpan("RecoverableZookeeper.exists");
RetryCounter retryCounter = retryCounterFactory.create();
while (true) {
try {
return checkZk().exists(path, watcher);
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case OPERATIONTIMEOUT:
retryOrThrow(retryCounter, e, "exists");
break;
default:
throw e;
}
}
retryCounter.sleepUntilNextRetry();
}
} finally {
if (traceScope != null)
traceScope.close();
}
}
use of org.apache.hadoop.hbase.util.RetryCounter in project hbase by apache.
the class ServerManager method isServerReachable.
/**
* Check if a region server is reachable and has the expected start code
*/
public boolean isServerReachable(ServerName server) {
if (server == null)
throw new NullPointerException("Passed server is null");
RetryCounter retryCounter = pingRetryCounterFactory.create();
while (retryCounter.shouldRetry()) {
try {
HBaseRpcController controller = newRpcController();
AdminService.BlockingInterface admin = getRsAdmin(server);
if (admin != null) {
ServerInfo info = ProtobufUtil.getServerInfo(controller, admin);
return info != null && info.hasServerName() && server.getStartcode() == info.getServerName().getStartCode();
}
} catch (IOException ioe) {
LOG.debug("Couldn't reach " + server + ", try=" + retryCounter.getAttemptTimes() + " of " + retryCounter.getMaxAttempts(), ioe);
try {
retryCounter.sleepUntilNextRetry();
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
}
}
return false;
}
use of org.apache.hadoop.hbase.util.RetryCounter in project hbase by apache.
the class RegionReplicaFlushHandler method triggerFlushInPrimaryRegion.
void triggerFlushInPrimaryRegion(final HRegion region) throws IOException, RuntimeException {
long pause = connection.getConfiguration().getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
int maxAttempts = getRetriesCount(connection.getConfiguration());
RetryCounter counter = new RetryCounterFactory(maxAttempts, (int) pause).create();
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to do an RPC to the primary region replica " + ServerRegionReplicaUtil.getRegionInfoForDefaultReplica(region.getRegionInfo()).getEncodedName() + " of region " + region.getRegionInfo().getEncodedName() + " to trigger a flush");
}
while (!region.isClosing() && !region.isClosed() && !server.isAborted() && !server.isStopped()) {
FlushRegionCallable flushCallable = new FlushRegionCallable(connection, rpcControllerFactory, RegionReplicaUtil.getRegionInfoForDefaultReplica(region.getRegionInfo()), true);
// TODO: flushRegion() is a blocking call waiting for the flush to complete. Ideally we
// do not have to wait for the whole flush here, just initiate it.
FlushRegionResponse response = null;
try {
response = rpcRetryingCallerFactory.<FlushRegionResponse>newCaller().callWithRetries(flushCallable, this.operationTimeout);
} catch (IOException ex) {
if (ex instanceof TableNotFoundException || connection.isTableDisabled(region.getRegionInfo().getTable())) {
return;
}
throw ex;
}
if (response.getFlushed()) {
// a complete flush cycle or replay a region open event
if (LOG.isDebugEnabled()) {
LOG.debug("Successfully triggered a flush of primary region replica " + ServerRegionReplicaUtil.getRegionInfoForDefaultReplica(region.getRegionInfo()).getEncodedName() + " of region " + region.getRegionInfo().getEncodedName() + " Now waiting and blocking reads until observing a full flush cycle");
}
break;
} else {
if (response.hasWroteFlushWalMarker()) {
if (response.getWroteFlushWalMarker()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Successfully triggered an empty flush marker(memstore empty) of primary " + "region replica " + ServerRegionReplicaUtil.getRegionInfoForDefaultReplica(region.getRegionInfo()).getEncodedName() + " of region " + region.getRegionInfo().getEncodedName() + " Now waiting and " + "blocking reads until observing a flush marker");
}
break;
} else {
// closing or already flushing. Retry flush again after some sleep.
if (!counter.shouldRetry()) {
throw new IOException("Cannot cause primary to flush or drop a wal marker after " + "retries. Failing opening of this region replica " + region.getRegionInfo().getEncodedName());
}
}
} else {
// nothing to do. Are we dealing with an old server?
LOG.warn("Was not able to trigger a flush from primary region due to old server version? " + "Continuing to open the secondary region replica: " + region.getRegionInfo().getEncodedName());
region.setReadsEnabled(true);
break;
}
}
try {
counter.sleepUntilNextRetry();
} catch (InterruptedException e) {
throw new InterruptedIOException(e.getMessage());
}
}
}
Aggregations