use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class HBaseInterClusterReplicationEndpoint method replicate.
/**
* Do the shipping logic
*/
@Override
public boolean replicate(ReplicateContext replicateContext) {
CompletionService<Integer> pool = new ExecutorCompletionService<>(this.exec);
List<Entry> entries = replicateContext.getEntries();
String walGroupId = replicateContext.getWalGroupId();
int sleepMultiplier = 1;
int numReplicated = 0;
if (!peersSelected && this.isRunning()) {
connectToPeers();
peersSelected = true;
}
int numSinks = replicationSinkMgr.getNumSinks();
if (numSinks == 0) {
LOG.warn("No replication sinks found, returning without replicating. The source should retry" + " with the same set of edits.");
return false;
}
// minimum of: configured threads, number of 100-waledit batches,
// and number of current sinks
int n = Math.min(Math.min(this.maxThreads, entries.size() / 100 + 1), numSinks);
List<List<Entry>> entryLists = new ArrayList<>(n);
if (n == 1) {
entryLists.add(entries);
} else {
for (int i = 0; i < n; i++) {
entryLists.add(new ArrayList<>(entries.size() / n + 1));
}
// now group by region
for (Entry e : entries) {
entryLists.get(Math.abs(Bytes.hashCode(e.getKey().getEncodedRegionName()) % n)).add(e);
}
}
while (this.isRunning() && !exec.isShutdown()) {
if (!isPeerEnabled()) {
if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
sleepMultiplier++;
}
continue;
}
try {
if (LOG.isTraceEnabled()) {
LOG.trace("Replicating " + entries.size() + " entries of total size " + replicateContext.getSize());
}
int futures = 0;
for (int i = 0; i < entryLists.size(); i++) {
if (!entryLists.get(i).isEmpty()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Submitting " + entryLists.get(i).size() + " entries of total size " + replicateContext.getSize());
}
// RuntimeExceptions encountered here bubble up and are handled in ReplicationSource
pool.submit(createReplicator(entryLists.get(i), i));
futures++;
}
}
IOException iox = null;
for (int i = 0; i < futures; i++) {
try {
// wait for all futures, remove successful parts
// (only the remaining parts will be retried)
Future<Integer> f = pool.take();
int index = f.get().intValue();
int batchSize = entryLists.get(index).size();
entryLists.set(index, Collections.<Entry>emptyList());
// Now, we have marked the batch as done replicating, record its size
numReplicated += batchSize;
} catch (InterruptedException ie) {
iox = new IOException(ie);
} catch (ExecutionException ee) {
// cause must be an IOException
iox = (IOException) ee.getCause();
}
}
if (iox != null) {
// if we had any exceptions, try again
throw iox;
}
if (numReplicated != entries.size()) {
// Something went wrong here and we don't know what, let's just fail and retry.
LOG.warn("The number of edits replicated is different from the number received," + " failing for now.");
return false;
}
// update metrics
this.metrics.setAgeOfLastShippedOp(entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
return true;
} catch (IOException ioe) {
// Didn't ship anything, but must still age the last time we did
this.metrics.refreshAgeOfLastShippedOp(walGroupId);
if (ioe instanceof RemoteException) {
ioe = ((RemoteException) ioe).unwrapRemoteException();
LOG.warn("Can't replicate because of an error on the remote cluster: ", ioe);
if (ioe instanceof TableNotFoundException) {
if (sleepForRetries("A table is missing in the peer cluster. " + "Replication cannot proceed without losing data.", sleepMultiplier)) {
sleepMultiplier++;
}
} else if (ioe instanceof SaslException) {
LOG.warn("Peer encountered SaslException, rechecking all sinks: ", ioe);
replicationSinkMgr.chooseSinks();
}
} else {
if (ioe instanceof SocketTimeoutException) {
// This exception means we waited for more than 60s and nothing
// happened, the cluster is alive and calling it right away
// even for a test just makes things worse.
sleepForRetries("Encountered a SocketTimeoutException. Since the " + "call to the remote cluster timed out, which is usually " + "caused by a machine failure or a massive slowdown", this.socketTimeoutMultiplier);
} else if (ioe instanceof ConnectException) {
LOG.warn("Peer is unavailable, rechecking all sinks: ", ioe);
replicationSinkMgr.chooseSinks();
} else {
LOG.warn("Can't replicate because of a local or network error: ", ioe);
}
}
if (sleepForRetries("Since we are unable to replicate", sleepMultiplier)) {
sleepMultiplier++;
}
}
}
// in case we exited before replicating
return false;
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class MetaTableLocator method verifyRegionLocation.
/**
* Verify we can connect to <code>hostingServer</code> and that its carrying
* <code>regionName</code>.
* @param hostingServer Interface to the server hosting <code>regionName</code>
* @param address The servername that goes with the <code>metaServer</code>
* Interface. Used logging.
* @param regionName The regionname we are interested in.
* @return True if we were able to verify the region located at other side of
* the Interface.
* @throws IOException
*/
// TODO: We should be able to get the ServerName from the AdminProtocol
// rather than have to pass it in. Its made awkward by the fact that the
// HRI is likely a proxy against remote server so the getServerName needs
// to be fixed to go to a local method or to a cache before we can do this.
private boolean verifyRegionLocation(final ClusterConnection connection, AdminService.BlockingInterface hostingServer, final ServerName address, final byte[] regionName) throws IOException {
if (hostingServer == null) {
LOG.info("Passed hostingServer is null");
return false;
}
Throwable t;
HBaseRpcController controller = connection.getRpcControllerFactory().newController();
try {
// Try and get regioninfo from the hosting server.
return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
} catch (ConnectException e) {
t = e;
} catch (RetriesExhaustedException e) {
t = e;
} catch (RemoteException e) {
IOException ioe = e.unwrapRemoteException();
t = ioe;
} catch (IOException e) {
Throwable cause = e.getCause();
if (cause != null && cause instanceof EOFException) {
t = cause;
} else if (cause != null && cause.getMessage() != null && cause.getMessage().contains("Connection reset")) {
t = cause;
} else {
t = e;
}
}
LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + " at address=" + address + ", exception=" + t.getMessage());
return false;
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class ProtobufUtil method toException.
/**
* Convert a stringified protocol buffer exception Parameter to a Java Exception
*
* @param parameter the protocol buffer Parameter to convert
* @return the converted Exception
* @throws IOException if failed to deserialize the parameter
*/
@SuppressWarnings("unchecked")
public static Throwable toException(final NameBytesPair parameter) throws IOException {
if (parameter == null || !parameter.hasValue())
return null;
String desc = parameter.getValue().toStringUtf8();
String type = parameter.getName();
try {
Class<? extends Throwable> c = (Class<? extends Throwable>) Class.forName(type, true, CLASS_LOADER);
Constructor<? extends Throwable> cn = null;
try {
cn = c.getDeclaredConstructor(String.class);
return cn.newInstance(desc);
} catch (NoSuchMethodException e) {
// Could be a raw RemoteException. See HBASE-8987.
cn = c.getDeclaredConstructor(String.class, String.class);
return cn.newInstance(type, desc);
}
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class ClientExceptionsUtil method findException.
/**
* Look for an exception we know in the remote exception:
* - hadoop.ipc wrapped exceptions
* - nested exceptions
*
* Looks for: RegionMovedException / RegionOpeningException / RegionTooBusyException /
* ThrottlingException
* @return null if we didn't find the exception, the exception otherwise.
*/
public static Throwable findException(Object exception) {
if (exception == null || !(exception instanceof Throwable)) {
return null;
}
Throwable cur = (Throwable) exception;
while (cur != null) {
if (isSpecialException(cur)) {
return cur;
}
if (cur instanceof RemoteException) {
RemoteException re = (RemoteException) cur;
cur = re.unwrapRemoteException();
// noinspection ObjectEquality
if (cur == re) {
return cur;
}
// When we receive RemoteException which wraps IOException which has a cause as
// RemoteException we can get into infinite loop here; so if the cause of the exception
// is RemoteException, we shouldn't look further.
} else if (cur.getCause() != null && !(cur.getCause() instanceof RemoteException)) {
cur = cur.getCause();
} else {
return cur;
}
}
return null;
}
use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.
the class WALProcedureStore method rollWriter.
private boolean rollWriter(final long logId) throws IOException {
assert logId > flushLogId : "logId=" + logId + " flushLogId=" + flushLogId;
assert lock.isHeldByCurrentThread() : "expected to be the lock owner. " + lock.isLocked();
ProcedureWALHeader header = ProcedureWALHeader.newBuilder().setVersion(ProcedureWALFormat.HEADER_VERSION).setType(ProcedureWALFormat.LOG_TYPE_STREAM).setMinProcId(storeTracker.getActiveMinProcId()).setLogId(logId).build();
FSDataOutputStream newStream = null;
Path newLogFile = null;
long startPos = -1;
newLogFile = getLogFilePath(logId);
try {
newStream = fs.create(newLogFile, false);
} catch (FileAlreadyExistsException e) {
LOG.error("Log file with id=" + logId + " already exists", e);
return false;
} catch (RemoteException re) {
LOG.warn("failed to create log file with id=" + logId, re);
return false;
}
try {
ProcedureWALFormat.writeHeader(newStream, header);
startPos = newStream.getPos();
} catch (IOException ioe) {
LOG.warn("Encountered exception writing header", ioe);
newStream.close();
return false;
}
closeCurrentLogStream();
storeTracker.resetUpdates();
stream = newStream;
flushLogId = logId;
totalSynced.set(0);
long rollTs = System.currentTimeMillis();
lastRollTs.set(rollTs);
logs.add(new ProcedureWALFile(fs, newLogFile, header, startPos, rollTs));
// if it's the first next WAL being added, build the holding cleanup tracker
if (logs.size() == 2) {
buildHoldingCleanupTracker();
} else if (logs.size() > walCountWarnThreshold) {
LOG.warn("procedure WALs count=" + logs.size() + " above the warning threshold " + walCountWarnThreshold + ". check running procedures to see if something is stuck.");
}
if (LOG.isDebugEnabled()) {
LOG.debug("Roll new state log: " + logId);
}
return true;
}
Aggregations