Search in sources :

Example 6 with ReplicationQueueInfo

use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.

the class ReplicationSourceFactory method create.

static ReplicationSourceInterface create(Configuration conf, String queueId) {
    ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(queueId);
    boolean isQueueRecovered = replicationQueueInfo.isQueueRecovered();
    ReplicationSourceInterface src;
    try {
        String defaultReplicationSourceImpl = isQueueRecovered ? RecoveredReplicationSource.class.getCanonicalName() : ReplicationSource.class.getCanonicalName();
        Class<?> c = Class.forName(conf.get("replication.replicationsource.implementation", defaultReplicationSourceImpl));
        src = c.asSubclass(ReplicationSourceInterface.class).getDeclaredConstructor().newInstance();
    } catch (Exception e) {
        LOG.warn("Passed replication source implementation throws errors, " + "defaulting to ReplicationSource", e);
        src = isQueueRecovered ? new RecoveredReplicationSource() : new ReplicationSource();
    }
    return src;
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo)

Example 7 with ReplicationQueueInfo

use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.

the class DumpReplicationQueues method dumpQueues.

public String dumpQueues(ClusterConnection connection, ZooKeeperWatcher zkw, Set<String> peerIds, boolean hdfs) throws Exception {
    ReplicationQueuesClient queuesClient;
    ReplicationPeers replicationPeers;
    ReplicationQueues replicationQueues;
    ReplicationTracker replicationTracker;
    ReplicationQueuesClientArguments replicationArgs = new ReplicationQueuesClientArguments(getConf(), new WarnOnlyAbortable(), zkw);
    StringBuilder sb = new StringBuilder();
    queuesClient = ReplicationFactory.getReplicationQueuesClient(replicationArgs);
    queuesClient.init();
    replicationQueues = ReplicationFactory.getReplicationQueues(replicationArgs);
    replicationPeers = ReplicationFactory.getReplicationPeers(zkw, getConf(), queuesClient, connection);
    replicationTracker = ReplicationFactory.getReplicationTracker(zkw, replicationPeers, getConf(), new WarnOnlyAbortable(), new WarnOnlyStoppable());
    List<String> liveRegionServers = replicationTracker.getListOfRegionServers();
    // Loops each peer on each RS and dumps the queues
    try {
        List<String> regionservers = queuesClient.getListOfReplicators();
        for (String regionserver : regionservers) {
            List<String> queueIds = queuesClient.getAllQueues(regionserver);
            replicationQueues.init(regionserver);
            if (!liveRegionServers.contains(regionserver)) {
                deadRegionServers.add(regionserver);
            }
            for (String queueId : queueIds) {
                ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
                List<String> wals = queuesClient.getLogsInQueue(regionserver, queueId);
                if (!peerIds.contains(queueInfo.getPeerId())) {
                    deletedQueues.add(regionserver + "/" + queueId);
                    sb.append(formatQueue(regionserver, replicationQueues, queueInfo, queueId, wals, true, hdfs));
                } else {
                    sb.append(formatQueue(regionserver, replicationQueues, queueInfo, queueId, wals, false, hdfs));
                }
            }
        }
    } catch (KeeperException ke) {
        throw new IOException(ke);
    }
    return sb.toString();
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo) IOException(java.io.IOException) ReplicationQueues(org.apache.hadoop.hbase.replication.ReplicationQueues) ReplicationTracker(org.apache.hadoop.hbase.replication.ReplicationTracker) ReplicationPeers(org.apache.hadoop.hbase.replication.ReplicationPeers) ReplicationQueuesClientArguments(org.apache.hadoop.hbase.replication.ReplicationQueuesClientArguments) KeeperException(org.apache.zookeeper.KeeperException) ReplicationQueuesClient(org.apache.hadoop.hbase.replication.ReplicationQueuesClient)

Example 8 with ReplicationQueueInfo

use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.

the class ReplicationSource method init.

/**
   * Instantiation method used by region servers
   *
   * @param conf configuration to use
   * @param fs file system to use
   * @param manager replication manager to ping to
   * @param stopper     the atomic boolean to use to stop the regionserver
   * @param peerClusterZnode the name of our znode
   * @param clusterId unique UUID for the cluster
   * @param replicationEndpoint the replication endpoint implementation
   * @param metrics metrics for replication source
   * @throws IOException
   */
@Override
public void init(final Configuration conf, final FileSystem fs, final ReplicationSourceManager manager, final ReplicationQueues replicationQueues, final ReplicationPeers replicationPeers, final Stoppable stopper, final String peerClusterZnode, final UUID clusterId, ReplicationEndpoint replicationEndpoint, final MetricsSource metrics) throws IOException {
    this.stopper = stopper;
    this.conf = HBaseConfiguration.create(conf);
    decorateConf();
    this.sleepForRetries = // 1 second
    this.conf.getLong("replication.source.sleepforretries", 1000);
    this.maxRetriesMultiplier = // 5 minutes @ 1 sec per
    this.conf.getInt("replication.source.maxretriesmultiplier", 300);
    this.queueSizePerGroup = this.conf.getInt("hbase.regionserver.maxlogs", 32);
    this.replicationQueues = replicationQueues;
    this.replicationPeers = replicationPeers;
    this.manager = manager;
    this.fs = fs;
    this.metrics = metrics;
    this.clusterId = clusterId;
    this.peerClusterZnode = peerClusterZnode;
    this.replicationQueueInfo = new ReplicationQueueInfo(peerClusterZnode);
    // ReplicationQueueInfo parses the peerId out of the znode for us
    this.peerId = this.replicationQueueInfo.getPeerId();
    ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(peerId);
    this.actualPeerId = replicationQueueInfo.getPeerId();
    this.logQueueWarnThreshold = this.conf.getInt("replication.source.log.queue.warn", 2);
    this.replicationEndpoint = replicationEndpoint;
    defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
    currentBandwidth = getCurrentBandwidth();
    this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0);
    this.totalBufferUsed = manager.getTotalBufferUsed();
    LOG.info("peerClusterZnode=" + peerClusterZnode + ", ReplicationSource : " + peerId + ", currentBandwidth=" + this.currentBandwidth);
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo)

Example 9 with ReplicationQueueInfo

use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.

the class ClaimReplicationQueuesProcedure method execute.

@Override
protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
    ReplicationQueueStorage storage = env.getReplicationPeerManager().getQueueStorage();
    try {
        List<String> queues = storage.getAllQueues(crashedServer);
        // as it may still be used by region servers which have not been upgraded yet.
        for (Iterator<String> iter = queues.iterator(); iter.hasNext(); ) {
            ReplicationQueueInfo queue = new ReplicationQueueInfo(iter.next());
            if (queue.getPeerId().equals(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_PEER)) {
                LOG.info("Found replication queue {} for legacy region replication peer, " + "skipping claiming and removing...", queue.getQueueId());
                iter.remove();
                storage.removeQueue(crashedServer, queue.getQueueId());
            }
        }
        if (queues.isEmpty()) {
            LOG.debug("Finish claiming replication queues for {}", crashedServer);
            storage.removeReplicatorIfQueueIsEmpty(crashedServer);
            // we are done
            return null;
        }
        LOG.debug("There are {} replication queues need to be claimed for {}", queues.size(), crashedServer);
        List<ServerName> targetServers = env.getMasterServices().getServerManager().getOnlineServersList();
        if (targetServers.isEmpty()) {
            throw new ReplicationException("no region server available");
        }
        Collections.shuffle(targetServers);
        ClaimReplicationQueueRemoteProcedure[] procs = new ClaimReplicationQueueRemoteProcedure[Math.min(queues.size(), targetServers.size())];
        for (int i = 0; i < procs.length; i++) {
            procs[i] = new ClaimReplicationQueueRemoteProcedure(crashedServer, queues.get(i), targetServers.get(i));
        }
        return procs;
    } catch (ReplicationException e) {
        if (retryCounter == null) {
            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
        }
        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
        LOG.warn("Failed to claim replication queues for {}, suspend {}secs {}; {};", crashedServer, backoff / 1000, e);
        setTimeout(Math.toIntExact(backoff));
        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
        skipPersistence();
        throw new ProcedureSuspendedException();
    }
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo) ServerName(org.apache.hadoop.hbase.ServerName) ReplicationException(org.apache.hadoop.hbase.replication.ReplicationException) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException)

Example 10 with ReplicationQueueInfo

use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.

the class ReplicationSource method init.

/**
 * Instantiation method used by region servers
 * @param conf configuration to use
 * @param fs file system to use
 * @param manager replication manager to ping to
 * @param server the server for this region server
 * @param queueId the id of our replication queue
 * @param clusterId unique UUID for the cluster
 * @param metrics metrics for replication source
 */
@Override
public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager, ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
    this.server = server;
    this.conf = HBaseConfiguration.create(conf);
    this.waitOnEndpointSeconds = this.conf.getInt(WAIT_ON_ENDPOINT_SECONDS, DEFAULT_WAIT_ON_ENDPOINT_SECONDS);
    decorateConf();
    this.sleepForRetries = // 1 second
    this.conf.getLong("replication.source.sleepforretries", 1000);
    this.maxRetriesMultiplier = // 5 minutes @ 1 sec per
    this.conf.getInt("replication.source.maxretriesmultiplier", 300);
    this.queueSizePerGroup = this.conf.getInt("hbase.regionserver.maxlogs", 32);
    this.logQueue = new ReplicationSourceLogQueue(conf, metrics, this);
    this.queueStorage = queueStorage;
    this.replicationPeer = replicationPeer;
    this.manager = manager;
    this.fs = fs;
    this.metrics = metrics;
    this.clusterId = clusterId;
    this.queueId = queueId;
    this.replicationQueueInfo = new ReplicationQueueInfo(queueId);
    // A defaultBandwidth of '0' means no bandwidth; i.e. no throttling.
    defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
    currentBandwidth = getCurrentBandwidth();
    this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0);
    this.totalBufferUsed = manager.getTotalBufferUsed();
    this.walFileLengthProvider = walFileLengthProvider;
    this.abortOnError = this.conf.getBoolean("replication.source.regionserver.abort", true);
    LOG.info("queueId={}, ReplicationSource: {}, currentBandwidth={}", queueId, replicationPeer.getId(), this.currentBandwidth);
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo)

Aggregations

ReplicationQueueInfo (org.apache.hadoop.hbase.replication.ReplicationQueueInfo)11 ServerName (org.apache.hadoop.hbase.ServerName)5 ReplicationQueueStorage (org.apache.hadoop.hbase.replication.ReplicationQueueStorage)5 HashMap (java.util.HashMap)4 ReplicationException (org.apache.hadoop.hbase.replication.ReplicationException)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 List (java.util.List)3 Map (java.util.Map)3 Set (java.util.Set)3 Configuration (org.apache.hadoop.conf.Configuration)3 ReplicationPeerStorage (org.apache.hadoop.hbase.replication.ReplicationPeerStorage)2 ReplicationPeers (org.apache.hadoop.hbase.replication.ReplicationPeers)2 ReplicationQueuesClient (org.apache.hadoop.hbase.replication.ReplicationQueuesClient)2 ReplicationQueuesClientArguments (org.apache.hadoop.hbase.replication.ReplicationQueuesClientArguments)2 ReplicationStorageFactory (org.apache.hadoop.hbase.replication.ReplicationStorageFactory)2 HbckErrorReporter (org.apache.hadoop.hbase.util.HbckErrorReporter)2 ZKWatcher (org.apache.hadoop.hbase.zookeeper.ZKWatcher)2 InterfaceAudience (org.apache.yetus.audience.InterfaceAudience)2