Search in sources :

Example 1 with RegionReplicationSink

use of org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink in project hbase by apache.

the class HRegion method initializeRegionReplicationSink.

private void initializeRegionReplicationSink(CancelableProgressable reporter, MonitoredTask status) {
    RegionServerServices rss = getRegionServerServices();
    TableDescriptor td = getTableDescriptor();
    int regionReplication = td.getRegionReplication();
    RegionInfo regionInfo = getRegionInfo();
    if (regionReplication <= 1 || !RegionReplicaUtil.isDefaultReplica(regionInfo) || !ServerRegionReplicaUtil.isRegionReplicaReplicationEnabled(conf, regionInfo.getTable()) || rss == null) {
        regionReplicationSink = Optional.empty();
        return;
    }
    status.setStatus("Initializaing region replication sink");
    regionReplicationSink = Optional.of(new RegionReplicationSink(conf, regionInfo, td, rss.getRegionReplicationBufferManager(), () -> rss.getFlushRequester().requestFlush(this, new ArrayList<>(td.getColumnFamilyNames()), FlushLifeCycleTracker.DUMMY), rss.getAsyncClusterConnection()));
}
Also used : ArrayList(java.util.ArrayList) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) RegionReplicationSink(org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink)

Example 2 with RegionReplicationSink

use of org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink in project hbase by apache.

the class HRegion method doClose.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "UL_UNRELEASED_LOCK_EXCEPTION_PATH", justification = "I think FindBugs is confused")
private Map<byte[], List<HStoreFile>> doClose(boolean abort, MonitoredTask status) throws IOException {
    if (isClosed()) {
        LOG.warn("Region " + this + " already closed");
        return null;
    }
    if (coprocessorHost != null) {
        status.setStatus("Running coprocessor pre-close hooks");
        this.coprocessorHost.preClose(abort);
    }
    status.setStatus("Disabling compacts and flushes for region");
    boolean canFlush = true;
    synchronized (writestate) {
        // Disable compacting and flushing by background threads for this
        // region.
        canFlush = !writestate.readOnly;
        writestate.writesEnabled = false;
        LOG.debug("Closing {}, disabling compactions & flushes", this.getRegionInfo().getEncodedName());
        waitForFlushesAndCompactions();
    }
    // the close flag?
    if (!abort && worthPreFlushing() && canFlush) {
        status.setStatus("Pre-flushing region before close");
        LOG.info("Running close preflush of {}", this.getRegionInfo().getEncodedName());
        try {
            internalFlushcache(status);
        } catch (IOException ioe) {
            // Failed to flush the region. Keep going.
            status.setStatus("Failed pre-flush " + this + "; " + ioe.getMessage());
        }
    }
    if (regionReplicationSink.isPresent()) {
        // stop replicating to secondary replicas
        // the open event marker can make secondary replicas refresh store files and catch up
        // everything, so here we just give up replicating later edits, to speed up the reopen process
        RegionReplicationSink sink = regionReplicationSink.get();
        sink.stop();
        try {
            regionReplicationSink.get().waitUntilStopped();
        } catch (InterruptedException e) {
            throw throwOnInterrupt(e);
        }
    }
    // Set the closing flag
    // From this point new arrivals at the region lock will get NSRE.
    this.closing.set(true);
    LOG.info("Closing region {}", this);
    // Acquire the close lock
    // The configuration parameter CLOSE_WAIT_ABORT is overloaded to enable both
    // the new regionserver abort condition and interrupts for running requests.
    // If CLOSE_WAIT_ABORT is not enabled there is no change from earlier behavior,
    // we will not attempt to interrupt threads servicing requests nor crash out
    // the regionserver if something remains stubborn.
    final boolean canAbort = conf.getBoolean(CLOSE_WAIT_ABORT, DEFAULT_CLOSE_WAIT_ABORT);
    boolean useTimedWait = false;
    if (timeoutForWriteLock != null && timeoutForWriteLock != Long.MAX_VALUE) {
        // convert legacy use of timeoutForWriteLock in seconds to new use in millis
        timeoutForWriteLock = TimeUnit.SECONDS.toMillis(timeoutForWriteLock);
        useTimedWait = true;
    } else if (canAbort) {
        timeoutForWriteLock = conf.getLong(CLOSE_WAIT_TIME, DEFAULT_CLOSE_WAIT_TIME);
        useTimedWait = true;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug((useTimedWait ? "Time limited wait" : "Waiting without time limit") + " for close lock on " + this);
    }
    final long closeWaitInterval = conf.getLong(CLOSE_WAIT_INTERVAL, DEFAULT_CLOSE_WAIT_INTERVAL);
    long elapsedWaitTime = 0;
    if (useTimedWait) {
        // Sanity check configuration
        long remainingWaitTime = timeoutForWriteLock;
        if (remainingWaitTime < closeWaitInterval) {
            LOG.warn("Time limit for close wait of " + timeoutForWriteLock + " ms is less than the configured lock acquisition wait interval " + closeWaitInterval + " ms, using wait interval as time limit");
            remainingWaitTime = closeWaitInterval;
        }
        boolean acquired = false;
        do {
            long start = EnvironmentEdgeManager.currentTime();
            try {
                acquired = lock.writeLock().tryLock(Math.min(remainingWaitTime, closeWaitInterval), TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                // Interrupted waiting for close lock. More likely the server is shutting down, not
                // normal operation, so aborting upon interrupt while waiting on this lock would not
                // provide much value. Throw an IOE (as IIOE) like we would in the case where we
                // fail to acquire the lock.
                String msg = "Interrupted while waiting for close lock on " + this;
                LOG.warn(msg, e);
                throw (InterruptedIOException) new InterruptedIOException(msg).initCause(e);
            }
            long elapsed = EnvironmentEdgeManager.currentTime() - start;
            elapsedWaitTime += elapsed;
            remainingWaitTime -= elapsed;
            if (canAbort && !acquired && remainingWaitTime > 0) {
                // endRegionOperation.
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Interrupting region operations after waiting for close lock for " + elapsedWaitTime + " ms on " + this + ", " + remainingWaitTime + " ms remaining");
                }
                interruptRegionOperations();
            }
        } while (!acquired && remainingWaitTime > 0);
        // to let the caller know we could not proceed with the close.
        if (!acquired) {
            String msg = "Failed to acquire close lock on " + this + " after waiting " + elapsedWaitTime + " ms";
            LOG.error(msg);
            if (canAbort) {
                // If we failed to acquire the write lock, abort the server
                rsServices.abort(msg, null);
            }
            throw new IOException(msg);
        }
    } else {
        long start = EnvironmentEdgeManager.currentTime();
        lock.writeLock().lock();
        elapsedWaitTime = EnvironmentEdgeManager.currentTime() - start;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Acquired close lock on " + this + " after waiting " + elapsedWaitTime + " ms");
    }
    status.setStatus("Disabling writes for close");
    try {
        if (this.isClosed()) {
            status.abort("Already got closed by another process");
            // SplitTransaction handles the null
            return null;
        }
        LOG.debug("Updates disabled for region " + this);
        // Don't flush the cache if we are aborting
        if (!abort && canFlush) {
            int failedfFlushCount = 0;
            int flushCount = 0;
            long tmp = 0;
            long remainingSize = this.memStoreSizing.getDataSize();
            while (remainingSize > 0) {
                try {
                    internalFlushcache(status);
                    if (flushCount > 0) {
                        LOG.info("Running extra flush, " + flushCount + " (carrying snapshot?) " + this);
                    }
                    flushCount++;
                    tmp = this.memStoreSizing.getDataSize();
                    if (tmp >= remainingSize) {
                        failedfFlushCount++;
                    }
                    remainingSize = tmp;
                    if (failedfFlushCount > 5) {
                        // so we do not lose data
                        throw new DroppedSnapshotException("Failed clearing memory after " + flushCount + " attempts on region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
                    }
                } catch (IOException ioe) {
                    status.setStatus("Failed flush " + this + ", putting online again");
                    synchronized (writestate) {
                        writestate.writesEnabled = true;
                    }
                    // Have to throw to upper layers.  I can't abort server from here.
                    throw ioe;
                }
            }
        }
        Map<byte[], List<HStoreFile>> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        if (!stores.isEmpty()) {
            // initialize the thread pool for closing stores in parallel.
            ThreadPoolExecutor storeCloserThreadPool = getStoreOpenAndCloseThreadPool("StoreCloser-" + getRegionInfo().getRegionNameAsString());
            CompletionService<Pair<byte[], Collection<HStoreFile>>> completionService = new ExecutorCompletionService<>(storeCloserThreadPool);
            // close each store in parallel
            for (HStore store : stores.values()) {
                MemStoreSize mss = store.getFlushableSize();
                if (!(abort || mss.getDataSize() == 0 || writestate.readOnly)) {
                    if (getRegionServerServices() != null) {
                        getRegionServerServices().abort("Assertion failed while closing store " + getRegionInfo().getRegionNameAsString() + " " + store + ". flushableSize expected=0, actual={" + mss + "}. Current memStoreSize=" + this.memStoreSizing.getMemStoreSize() + ". Maybe a coprocessor " + "operation failed and left the memstore in a partially updated state.", null);
                    }
                }
                completionService.submit(new Callable<Pair<byte[], Collection<HStoreFile>>>() {

                    @Override
                    public Pair<byte[], Collection<HStoreFile>> call() throws IOException {
                        return new Pair<>(store.getColumnFamilyDescriptor().getName(), store.close());
                    }
                });
            }
            try {
                for (int i = 0; i < stores.size(); i++) {
                    Future<Pair<byte[], Collection<HStoreFile>>> future = completionService.take();
                    Pair<byte[], Collection<HStoreFile>> storeFiles = future.get();
                    List<HStoreFile> familyFiles = result.get(storeFiles.getFirst());
                    if (familyFiles == null) {
                        familyFiles = new ArrayList<>();
                        result.put(storeFiles.getFirst(), familyFiles);
                    }
                    familyFiles.addAll(storeFiles.getSecond());
                }
            } catch (InterruptedException e) {
                throw throwOnInterrupt(e);
            } catch (ExecutionException e) {
                Throwable cause = e.getCause();
                if (cause instanceof IOException) {
                    throw (IOException) cause;
                }
                throw new IOException(cause);
            } finally {
                storeCloserThreadPool.shutdownNow();
            }
        }
        status.setStatus("Writing region close event to WAL");
        // do not write any data into the region; it is just a meta edit in the WAL file.
        if (!abort && wal != null && getRegionServerServices() != null && RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
            writeRegionCloseMarker(wal);
        }
        this.closed.set(true);
        if (!canFlush) {
            decrMemStoreSize(this.memStoreSizing.getMemStoreSize());
        } else if (this.memStoreSizing.getDataSize() != 0) {
            LOG.error("Memstore data size is {} in region {}", this.memStoreSizing.getDataSize(), this);
        }
        if (coprocessorHost != null) {
            status.setStatus("Running coprocessor post-close hooks");
            this.coprocessorHost.postClose(abort);
        }
        if (this.metricsRegion != null) {
            this.metricsRegion.close();
        }
        if (this.metricsRegionWrapper != null) {
            Closeables.close(this.metricsRegionWrapper, true);
        }
        status.markComplete("Closed");
        LOG.info("Closed {}", this);
        return result;
    } finally {
        lock.writeLock().unlock();
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) RegionReplicationSink(org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink) Pair(org.apache.hadoop.hbase.util.Pair) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) InterruptedIOException(java.io.InterruptedIOException) TreeMap(java.util.TreeMap) Collection(java.util.Collection) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Aggregations

ArrayList (java.util.ArrayList)2 RegionReplicationSink (org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink)2 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 Collection (java.util.Collection)1 List (java.util.List)1 TreeMap (java.util.TreeMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)1 DroppedSnapshotException (org.apache.hadoop.hbase.DroppedSnapshotException)1 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)1 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)1 TimeoutIOException (org.apache.hadoop.hbase.exceptions.TimeoutIOException)1 Pair (org.apache.hadoop.hbase.util.Pair)1