Search in sources :

Example 1 with ClientExecReturn

use of org.apache.accumulo.core.client.impl.ClientExecReturn in project accumulo by apache.

the class AccumuloReplicaSystem method _replicate.

/**
 * Perform replication, making a few attempts when an exception is returned.
 *
 * @param p
 *          Path of WAL to replicate
 * @param status
 *          Current status for the WAL
 * @param target
 *          Where we're replicating to
 * @param helper
 *          A helper for replication
 * @param localConf
 *          The local instance's configuration
 * @param peerContext
 *          The ClientContext to connect to the peer
 * @return The new (or unchanged) Status for the WAL
 */
private Status _replicate(final Path p, final Status status, final ReplicationTarget target, final ReplicaSystemHelper helper, final AccumuloConfiguration localConf, final ClientContext peerContext, final UserGroupInformation accumuloUgi) {
    try {
        double tracePercent = localConf.getFraction(Property.REPLICATION_TRACE_PERCENT);
        ProbabilitySampler sampler = new ProbabilitySampler(tracePercent);
        Trace.on("AccumuloReplicaSystem", sampler);
        // Remote identifier is an integer (table id) in this case.
        final String remoteTableId = target.getRemoteIdentifier();
        // Attempt the replication of this status a number of times before giving up and
        // trying to replicate it again later some other time.
        int numAttempts = localConf.getCount(Property.REPLICATION_WORK_ATTEMPTS);
        for (int i = 0; i < numAttempts; i++) {
            log.debug("Attempt {}", i);
            String peerTserverStr;
            log.debug("Fetching peer tserver address");
            Span span = Trace.start("Fetch peer tserver");
            try {
                // Ask the master on the remote what TServer we should talk with to replicate the data
                peerTserverStr = ReplicationClient.executeCoordinatorWithReturn(peerContext, new ClientExecReturn<String, ReplicationCoordinator.Client>() {

                    @Override
                    public String execute(ReplicationCoordinator.Client client) throws Exception {
                        return client.getServicerAddress(remoteTableId, peerContext.rpcCreds());
                    }
                });
            } catch (AccumuloException | AccumuloSecurityException e) {
                // No progress is made
                log.error("Could not connect to master at {}, cannot proceed with replication. Will retry", target, e);
                continue;
            } finally {
                span.stop();
            }
            if (null == peerTserverStr) {
                // Something went wrong, and we didn't get a valid tserver from the remote for some reason
                log.warn("Did not receive tserver from master at {}, cannot proceed with replication. Will retry.", target);
                continue;
            }
            final HostAndPort peerTserver = HostAndPort.fromString(peerTserverStr);
            final long timeout = localConf.getTimeInMillis(Property.REPLICATION_RPC_TIMEOUT);
            // We have a tserver on the remote -- send the data its way.
            Status finalStatus;
            final long sizeLimit = conf.getAsBytes(Property.REPLICATION_MAX_UNIT_SIZE);
            try {
                if (p.getName().endsWith(RFILE_SUFFIX)) {
                    span = Trace.start("RFile replication");
                    try {
                        finalStatus = replicateRFiles(peerContext, peerTserver, target, p, status, sizeLimit, remoteTableId, peerContext.rpcCreds(), helper, timeout);
                    } finally {
                        span.stop();
                    }
                } else {
                    span = Trace.start("WAL replication");
                    try {
                        finalStatus = replicateLogs(peerContext, peerTserver, target, p, status, sizeLimit, remoteTableId, peerContext.rpcCreds(), helper, accumuloUgi, timeout);
                    } finally {
                        span.stop();
                    }
                }
                log.debug("New status for {} after replicating to {} is {}", p, peerContext.getInstance(), ProtobufUtil.toString(finalStatus));
                return finalStatus;
            } catch (TTransportException | AccumuloException | AccumuloSecurityException e) {
                log.warn("Could not connect to remote server {}, will retry", peerTserverStr, e);
                sleepUninterruptibly(1, TimeUnit.SECONDS);
            }
        }
        log.info("No progress was made after {} attempts to replicate {}, returning so file can be re-queued", numAttempts, p);
        // We made no status, punt on it for now, and let it re-queue itself for work
        return status;
    } finally {
        Trace.off();
    }
}
Also used : ProbabilitySampler(org.apache.accumulo.core.trace.ProbabilitySampler) Status(org.apache.accumulo.server.replication.proto.Replication.Status) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ClientExecReturn(org.apache.accumulo.core.client.impl.ClientExecReturn) TTransportException(org.apache.thrift.transport.TTransportException) ReplicationCoordinator(org.apache.accumulo.core.replication.thrift.ReplicationCoordinator) Span(org.apache.accumulo.core.trace.Span) HostAndPort(org.apache.accumulo.core.util.HostAndPort) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Client(org.apache.accumulo.core.replication.thrift.ReplicationServicer.Client) ReplicationClient(org.apache.accumulo.core.client.impl.ReplicationClient)

Aggregations

AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 ClientExecReturn (org.apache.accumulo.core.client.impl.ClientExecReturn)1 ReplicationClient (org.apache.accumulo.core.client.impl.ReplicationClient)1 ReplicationCoordinator (org.apache.accumulo.core.replication.thrift.ReplicationCoordinator)1 Client (org.apache.accumulo.core.replication.thrift.ReplicationServicer.Client)1 ProbabilitySampler (org.apache.accumulo.core.trace.ProbabilitySampler)1 Span (org.apache.accumulo.core.trace.Span)1 HostAndPort (org.apache.accumulo.core.util.HostAndPort)1 Status (org.apache.accumulo.server.replication.proto.Replication.Status)1 TTransportException (org.apache.thrift.transport.TTransportException)1