Search in sources :

Example 36 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class HBaseInterClusterReplicationEndpoint method replicate.

/**
   * Do the shipping logic
   */
@Override
public boolean replicate(ReplicateContext replicateContext) {
    CompletionService<Integer> pool = new ExecutorCompletionService<>(this.exec);
    List<Entry> entries = replicateContext.getEntries();
    String walGroupId = replicateContext.getWalGroupId();
    int sleepMultiplier = 1;
    int numReplicated = 0;
    if (!peersSelected && this.isRunning()) {
        connectToPeers();
        peersSelected = true;
    }
    int numSinks = replicationSinkMgr.getNumSinks();
    if (numSinks == 0) {
        LOG.warn("No replication sinks found, returning without replicating. The source should retry" + " with the same set of edits.");
        return false;
    }
    // minimum of: configured threads, number of 100-waledit batches,
    //  and number of current sinks
    int n = Math.min(Math.min(this.maxThreads, entries.size() / 100 + 1), numSinks);
    List<List<Entry>> entryLists = new ArrayList<>(n);
    if (n == 1) {
        entryLists.add(entries);
    } else {
        for (int i = 0; i < n; i++) {
            entryLists.add(new ArrayList<>(entries.size() / n + 1));
        }
        // now group by region
        for (Entry e : entries) {
            entryLists.get(Math.abs(Bytes.hashCode(e.getKey().getEncodedRegionName()) % n)).add(e);
        }
    }
    while (this.isRunning() && !exec.isShutdown()) {
        if (!isPeerEnabled()) {
            if (sleepForRetries("Replication is disabled", sleepMultiplier)) {
                sleepMultiplier++;
            }
            continue;
        }
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace("Replicating " + entries.size() + " entries of total size " + replicateContext.getSize());
            }
            int futures = 0;
            for (int i = 0; i < entryLists.size(); i++) {
                if (!entryLists.get(i).isEmpty()) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Submitting " + entryLists.get(i).size() + " entries of total size " + replicateContext.getSize());
                    }
                    // RuntimeExceptions encountered here bubble up and are handled in ReplicationSource
                    pool.submit(createReplicator(entryLists.get(i), i));
                    futures++;
                }
            }
            IOException iox = null;
            for (int i = 0; i < futures; i++) {
                try {
                    // wait for all futures, remove successful parts
                    // (only the remaining parts will be retried)
                    Future<Integer> f = pool.take();
                    int index = f.get().intValue();
                    int batchSize = entryLists.get(index).size();
                    entryLists.set(index, Collections.<Entry>emptyList());
                    // Now, we have marked the batch as done replicating, record its size
                    numReplicated += batchSize;
                } catch (InterruptedException ie) {
                    iox = new IOException(ie);
                } catch (ExecutionException ee) {
                    // cause must be an IOException
                    iox = (IOException) ee.getCause();
                }
            }
            if (iox != null) {
                // if we had any exceptions, try again
                throw iox;
            }
            if (numReplicated != entries.size()) {
                // Something went wrong here and we don't know what, let's just fail and retry.
                LOG.warn("The number of edits replicated is different from the number received," + " failing for now.");
                return false;
            }
            // update metrics
            this.metrics.setAgeOfLastShippedOp(entries.get(entries.size() - 1).getKey().getWriteTime(), walGroupId);
            return true;
        } catch (IOException ioe) {
            // Didn't ship anything, but must still age the last time we did
            this.metrics.refreshAgeOfLastShippedOp(walGroupId);
            if (ioe instanceof RemoteException) {
                ioe = ((RemoteException) ioe).unwrapRemoteException();
                LOG.warn("Can't replicate because of an error on the remote cluster: ", ioe);
                if (ioe instanceof TableNotFoundException) {
                    if (sleepForRetries("A table is missing in the peer cluster. " + "Replication cannot proceed without losing data.", sleepMultiplier)) {
                        sleepMultiplier++;
                    }
                } else if (ioe instanceof SaslException) {
                    LOG.warn("Peer encountered SaslException, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                }
            } else {
                if (ioe instanceof SocketTimeoutException) {
                    // This exception means we waited for more than 60s and nothing
                    // happened, the cluster is alive and calling it right away
                    // even for a test just makes things worse.
                    sleepForRetries("Encountered a SocketTimeoutException. Since the " + "call to the remote cluster timed out, which is usually " + "caused by a machine failure or a massive slowdown", this.socketTimeoutMultiplier);
                } else if (ioe instanceof ConnectException) {
                    LOG.warn("Peer is unavailable, rechecking all sinks: ", ioe);
                    replicationSinkMgr.chooseSinks();
                } else {
                    LOG.warn("Can't replicate because of a local or network error: ", ioe);
                }
            }
            if (sleepForRetries("Since we are unable to replicate", sleepMultiplier)) {
                sleepMultiplier++;
            }
        }
    }
    // in case we exited before replicating
    return false;
}
Also used : ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) IOException(java.io.IOException) SaslException(javax.security.sasl.SaslException) HBaseReplicationEndpoint(org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) Entry(org.apache.hadoop.hbase.wal.WAL.Entry) SocketTimeoutException(java.net.SocketTimeoutException) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) RemoteException(org.apache.hadoop.ipc.RemoteException) ConnectException(java.net.ConnectException)

Example 37 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class MetaTableLocator method verifyRegionLocation.

/**
   * Verify we can connect to <code>hostingServer</code> and that its carrying
   * <code>regionName</code>.
   * @param hostingServer Interface to the server hosting <code>regionName</code>
   * @param address The servername that goes with the <code>metaServer</code>
   * Interface.  Used logging.
   * @param regionName The regionname we are interested in.
   * @return True if we were able to verify the region located at other side of
   * the Interface.
   * @throws IOException
   */
// TODO: We should be able to get the ServerName from the AdminProtocol
// rather than have to pass it in.  Its made awkward by the fact that the
// HRI is likely a proxy against remote server so the getServerName needs
// to be fixed to go to a local method or to a cache before we can do this.
private boolean verifyRegionLocation(final ClusterConnection connection, AdminService.BlockingInterface hostingServer, final ServerName address, final byte[] regionName) throws IOException {
    if (hostingServer == null) {
        LOG.info("Passed hostingServer is null");
        return false;
    }
    Throwable t;
    HBaseRpcController controller = connection.getRpcControllerFactory().newController();
    try {
        // Try and get regioninfo from the hosting server.
        return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
    } catch (ConnectException e) {
        t = e;
    } catch (RetriesExhaustedException e) {
        t = e;
    } catch (RemoteException e) {
        IOException ioe = e.unwrapRemoteException();
        t = ioe;
    } catch (IOException e) {
        Throwable cause = e.getCause();
        if (cause != null && cause instanceof EOFException) {
            t = cause;
        } else if (cause != null && cause.getMessage() != null && cause.getMessage().contains("Connection reset")) {
            t = cause;
        } else {
            t = e;
        }
    }
    LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) + " at address=" + address + ", exception=" + t.getMessage());
    return false;
}
Also used : HBaseRpcController(org.apache.hadoop.hbase.ipc.HBaseRpcController) RetriesExhaustedException(org.apache.hadoop.hbase.client.RetriesExhaustedException) EOFException(java.io.EOFException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) ConnectException(java.net.ConnectException)

Example 38 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class ProtobufUtil method toException.

/**
   * Convert a stringified protocol buffer exception Parameter to a Java Exception
   *
   * @param parameter the protocol buffer Parameter to convert
   * @return the converted Exception
   * @throws IOException if failed to deserialize the parameter
   */
@SuppressWarnings("unchecked")
public static Throwable toException(final NameBytesPair parameter) throws IOException {
    if (parameter == null || !parameter.hasValue())
        return null;
    String desc = parameter.getValue().toStringUtf8();
    String type = parameter.getName();
    try {
        Class<? extends Throwable> c = (Class<? extends Throwable>) Class.forName(type, true, CLASS_LOADER);
        Constructor<? extends Throwable> cn = null;
        try {
            cn = c.getDeclaredConstructor(String.class);
            return cn.newInstance(desc);
        } catch (NoSuchMethodException e) {
            // Could be a raw RemoteException. See HBASE-8987.
            cn = c.getDeclaredConstructor(String.class, String.class);
            return cn.newInstance(type, desc);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : ByteString(com.google.protobuf.ByteString) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) ServiceException(com.google.protobuf.ServiceException) DeserializationException(org.apache.hadoop.hbase.exceptions.DeserializationException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 39 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class ClientExceptionsUtil method findException.

/**
   * Look for an exception we know in the remote exception:
   * - hadoop.ipc wrapped exceptions
   * - nested exceptions
   *
   * Looks for: RegionMovedException / RegionOpeningException / RegionTooBusyException /
   *            ThrottlingException
   * @return null if we didn't find the exception, the exception otherwise.
   */
public static Throwable findException(Object exception) {
    if (exception == null || !(exception instanceof Throwable)) {
        return null;
    }
    Throwable cur = (Throwable) exception;
    while (cur != null) {
        if (isSpecialException(cur)) {
            return cur;
        }
        if (cur instanceof RemoteException) {
            RemoteException re = (RemoteException) cur;
            cur = re.unwrapRemoteException();
            // noinspection ObjectEquality
            if (cur == re) {
                return cur;
            }
        // When we receive RemoteException which wraps IOException which has a cause as
        // RemoteException we can get into infinite loop here; so if the cause of the exception
        // is RemoteException, we shouldn't look further.
        } else if (cur.getCause() != null && !(cur.getCause() instanceof RemoteException)) {
            cur = cur.getCause();
        } else {
            return cur;
        }
    }
    return null;
}
Also used : RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 40 with RemoteException

use of org.apache.hadoop.ipc.RemoteException in project hbase by apache.

the class WALProcedureStore method rollWriter.

private boolean rollWriter(final long logId) throws IOException {
    assert logId > flushLogId : "logId=" + logId + " flushLogId=" + flushLogId;
    assert lock.isHeldByCurrentThread() : "expected to be the lock owner. " + lock.isLocked();
    ProcedureWALHeader header = ProcedureWALHeader.newBuilder().setVersion(ProcedureWALFormat.HEADER_VERSION).setType(ProcedureWALFormat.LOG_TYPE_STREAM).setMinProcId(storeTracker.getActiveMinProcId()).setLogId(logId).build();
    FSDataOutputStream newStream = null;
    Path newLogFile = null;
    long startPos = -1;
    newLogFile = getLogFilePath(logId);
    try {
        newStream = fs.create(newLogFile, false);
    } catch (FileAlreadyExistsException e) {
        LOG.error("Log file with id=" + logId + " already exists", e);
        return false;
    } catch (RemoteException re) {
        LOG.warn("failed to create log file with id=" + logId, re);
        return false;
    }
    try {
        ProcedureWALFormat.writeHeader(newStream, header);
        startPos = newStream.getPos();
    } catch (IOException ioe) {
        LOG.warn("Encountered exception writing header", ioe);
        newStream.close();
        return false;
    }
    closeCurrentLogStream();
    storeTracker.resetUpdates();
    stream = newStream;
    flushLogId = logId;
    totalSynced.set(0);
    long rollTs = System.currentTimeMillis();
    lastRollTs.set(rollTs);
    logs.add(new ProcedureWALFile(fs, newLogFile, header, startPos, rollTs));
    // if it's the first next WAL being added, build the holding cleanup tracker
    if (logs.size() == 2) {
        buildHoldingCleanupTracker();
    } else if (logs.size() > walCountWarnThreshold) {
        LOG.warn("procedure WALs count=" + logs.size() + " above the warning threshold " + walCountWarnThreshold + ". check running procedures to see if something is stuck.");
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Roll new state log: " + logId);
    }
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) ProcedureWALHeader(org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureWALHeader) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException)

Aggregations

RemoteException (org.apache.hadoop.ipc.RemoteException)99 IOException (java.io.IOException)53 Test (org.junit.Test)39 Path (org.apache.hadoop.fs.Path)36 Configuration (org.apache.hadoop.conf.Configuration)20 FileNotFoundException (java.io.FileNotFoundException)19 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 InterruptedIOException (java.io.InterruptedIOException)10 AccessControlException (org.apache.hadoop.security.AccessControlException)10 ServerName (org.apache.hadoop.hbase.ServerName)9 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)8 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)8 FileAlreadyExistsException (org.apache.hadoop.fs.FileAlreadyExistsException)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)7 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 EOFException (java.io.EOFException)6 ArrayList (java.util.ArrayList)6 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)6 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)6