Search in sources :

Example 11 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class HRegion method doClose.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "UL_UNRELEASED_LOCK_EXCEPTION_PATH", justification = "I think FindBugs is confused")
private Map<byte[], List<StoreFile>> doClose(final boolean abort, MonitoredTask status) throws IOException {
    if (isClosed()) {
        LOG.warn("Region " + this + " already closed");
        return null;
    }
    if (coprocessorHost != null) {
        status.setStatus("Running coprocessor pre-close hooks");
        this.coprocessorHost.preClose(abort);
    }
    status.setStatus("Disabling compacts and flushes for region");
    boolean canFlush = true;
    synchronized (writestate) {
        // Disable compacting and flushing by background threads for this
        // region.
        canFlush = !writestate.readOnly;
        writestate.writesEnabled = false;
        LOG.debug("Closing " + this + ": disabling compactions & flushes");
        waitForFlushesAndCompactions();
    }
    // the close flag?
    if (!abort && worthPreFlushing() && canFlush) {
        status.setStatus("Pre-flushing region before close");
        LOG.info("Running close preflush of " + getRegionInfo().getRegionNameAsString());
        try {
            internalFlushcache(status);
        } catch (IOException ioe) {
            // Failed to flush the region. Keep going.
            status.setStatus("Failed pre-flush " + this + "; " + ioe.getMessage());
        }
    }
    if (timeoutForWriteLock == null || timeoutForWriteLock == Long.MAX_VALUE) {
        // block waiting for the lock for closing
        // FindBugs: Complains UL_UNRELEASED_LOCK_EXCEPTION_PATH but seems fine
        lock.writeLock().lock();
    } else {
        try {
            boolean succeed = lock.writeLock().tryLock(timeoutForWriteLock, TimeUnit.SECONDS);
            if (!succeed) {
                throw new IOException("Failed to get write lock when closing region");
            }
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        }
    }
    this.closing.set(true);
    status.setStatus("Disabling writes for close");
    try {
        if (this.isClosed()) {
            status.abort("Already got closed by another process");
            // SplitTransaction handles the null
            return null;
        }
        LOG.debug("Updates disabled for region " + this);
        // Don't flush the cache if we are aborting
        if (!abort && canFlush) {
            int failedfFlushCount = 0;
            int flushCount = 0;
            long tmp = 0;
            long remainingSize = this.memstoreDataSize.get();
            while (remainingSize > 0) {
                try {
                    internalFlushcache(status);
                    if (flushCount > 0) {
                        LOG.info("Running extra flush, " + flushCount + " (carrying snapshot?) " + this);
                    }
                    flushCount++;
                    tmp = this.memstoreDataSize.get();
                    if (tmp >= remainingSize) {
                        failedfFlushCount++;
                    }
                    remainingSize = tmp;
                    if (failedfFlushCount > 5) {
                        // so we do not lose data
                        throw new DroppedSnapshotException("Failed clearing memory after " + flushCount + " attempts on region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
                    }
                } catch (IOException ioe) {
                    status.setStatus("Failed flush " + this + ", putting online again");
                    synchronized (writestate) {
                        writestate.writesEnabled = true;
                    }
                    // Have to throw to upper layers.  I can't abort server from here.
                    throw ioe;
                }
            }
        }
        Map<byte[], List<StoreFile>> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        if (!stores.isEmpty()) {
            // initialize the thread pool for closing stores in parallel.
            ThreadPoolExecutor storeCloserThreadPool = getStoreOpenAndCloseThreadPool("StoreCloserThread-" + getRegionInfo().getRegionNameAsString());
            CompletionService<Pair<byte[], Collection<StoreFile>>> completionService = new ExecutorCompletionService<>(storeCloserThreadPool);
            // close each store in parallel
            for (final Store store : stores.values()) {
                MemstoreSize flushableSize = store.getSizeToFlush();
                if (!(abort || flushableSize.getDataSize() == 0 || writestate.readOnly)) {
                    if (getRegionServerServices() != null) {
                        getRegionServerServices().abort("Assertion failed while closing store " + getRegionInfo().getRegionNameAsString() + " " + store + ". flushableSize expected=0, actual= " + flushableSize + ". Current memstoreSize=" + getMemstoreSize() + ". Maybe a coprocessor " + "operation failed and left the memstore in a partially updated state.", null);
                    }
                }
                completionService.submit(new Callable<Pair<byte[], Collection<StoreFile>>>() {

                    @Override
                    public Pair<byte[], Collection<StoreFile>> call() throws IOException {
                        return new Pair<>(store.getFamily().getName(), store.close());
                    }
                });
            }
            try {
                for (int i = 0; i < stores.size(); i++) {
                    Future<Pair<byte[], Collection<StoreFile>>> future = completionService.take();
                    Pair<byte[], Collection<StoreFile>> storeFiles = future.get();
                    List<StoreFile> familyFiles = result.get(storeFiles.getFirst());
                    if (familyFiles == null) {
                        familyFiles = new ArrayList<>();
                        result.put(storeFiles.getFirst(), familyFiles);
                    }
                    familyFiles.addAll(storeFiles.getSecond());
                }
            } catch (InterruptedException e) {
                throw (InterruptedIOException) new InterruptedIOException().initCause(e);
            } catch (ExecutionException e) {
                Throwable cause = e.getCause();
                if (cause instanceof IOException) {
                    throw (IOException) cause;
                }
                throw new IOException(cause);
            } finally {
                storeCloserThreadPool.shutdownNow();
            }
        }
        status.setStatus("Writing region close event to WAL");
        if (!abort && wal != null && getRegionServerServices() != null && !writestate.readOnly) {
            writeRegionCloseMarker(wal);
        }
        this.closed.set(true);
        if (!canFlush) {
            this.decrMemstoreSize(new MemstoreSize(memstoreDataSize.get(), getMemstoreHeapSize()));
        } else if (memstoreDataSize.get() != 0) {
            LOG.error("Memstore size is " + memstoreDataSize.get());
        }
        if (coprocessorHost != null) {
            status.setStatus("Running coprocessor post-close hooks");
            this.coprocessorHost.postClose(abort);
        }
        if (this.metricsRegion != null) {
            this.metricsRegion.close();
        }
        if (this.metricsRegionWrapper != null) {
            Closeables.closeQuietly(this.metricsRegionWrapper);
        }
        // stop the Compacted hfile discharger
        if (this.compactedFileDischarger != null)
            this.compactedFileDischarger.cancel(true);
        status.markComplete("Closed");
        LOG.info("Closed " + this);
        return result;
    } finally {
        lock.writeLock().unlock();
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) ArrayList(java.util.ArrayList) AbstractList(java.util.AbstractList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) Pair(org.apache.hadoop.hbase.util.Pair) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) TimeoutIOException(org.apache.hadoop.hbase.exceptions.TimeoutIOException) TreeMap(java.util.TreeMap) Collection(java.util.Collection) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 12 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class HRegion method internalFlushCacheAndCommit.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NN_NAKED_NOTIFY", justification = "Intentional; notify is about completed flush")
protected FlushResult internalFlushCacheAndCommit(final WAL wal, MonitoredTask status, final PrepareFlushResult prepareResult, final Collection<Store> storesToFlush) throws IOException {
    // prepare flush context is carried via PrepareFlushResult
    TreeMap<byte[], StoreFlushContext> storeFlushCtxs = prepareResult.storeFlushCtxs;
    TreeMap<byte[], List<Path>> committedFiles = prepareResult.committedFiles;
    long startTime = prepareResult.startTime;
    long flushOpSeqId = prepareResult.flushOpSeqId;
    long flushedSeqId = prepareResult.flushedSeqId;
    String s = "Flushing stores of " + this;
    status.setStatus(s);
    if (LOG.isTraceEnabled())
        LOG.trace(s);
    // Any failure from here on out will be catastrophic requiring server
    // restart so wal content can be replayed and put back into the memstore.
    // Otherwise, the snapshot content while backed up in the wal, it will not
    // be part of the current running servers state.
    boolean compactionRequested = false;
    long flushedOutputFileSize = 0;
    try {
        for (StoreFlushContext flush : storeFlushCtxs.values()) {
            flush.flushCache(status);
        }
        // Switch snapshot (in memstore) -> new hfile (thus causing
        // all the store scanners to reset/reseek).
        Iterator<Store> it = storesToFlush.iterator();
        // stores.values() and storeFlushCtxs have same order
        for (StoreFlushContext flush : storeFlushCtxs.values()) {
            boolean needsCompaction = flush.commit(status);
            if (needsCompaction) {
                compactionRequested = true;
            }
            byte[] storeName = it.next().getFamily().getName();
            List<Path> storeCommittedFiles = flush.getCommittedFiles();
            committedFiles.put(storeName, storeCommittedFiles);
            // Flush committed no files, indicating flush is empty or flush was canceled
            if (storeCommittedFiles == null || storeCommittedFiles.isEmpty()) {
                MemstoreSize storeFlushableSize = prepareResult.storeFlushableSize.get(storeName);
                prepareResult.totalFlushableSize.decMemstoreSize(storeFlushableSize);
            }
            flushedOutputFileSize += flush.getOutputFileSize();
        }
        storeFlushCtxs.clear();
        // Set down the memstore size by amount of flush.
        this.decrMemstoreSize(prepareResult.totalFlushableSize);
        if (wal != null) {
            // write flush marker to WAL. If fail, we should throw DroppedSnapshotException
            FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.COMMIT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
            WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, true, mvcc);
        }
    } catch (Throwable t) {
        // all and sundry.
        if (wal != null) {
            try {
                FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
                WALUtil.writeFlushMarker(wal, this.replicationScope, getRegionInfo(), desc, false, mvcc);
            } catch (Throwable ex) {
                LOG.warn(getRegionInfo().getEncodedName() + " : " + "failed writing ABORT_FLUSH marker to WAL", ex);
            // ignore this since we will be aborting the RS with DSE.
            }
            wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
        }
        DroppedSnapshotException dse = new DroppedSnapshotException("region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
        dse.initCause(t);
        status.abort("Flush failed: " + StringUtils.stringifyException(t));
        // Callers for flushcache() should catch DroppedSnapshotException and abort the region server.
        // However, since we may have the region read lock, we cannot call close(true) here since
        // we cannot promote to a write lock. Instead we are setting closing so that all other region
        // operations except for close will be rejected.
        this.closing.set(true);
        if (rsServices != null) {
            // This is a safeguard against the case where the caller fails to explicitly handle aborting
            rsServices.abort("Replay of WAL required. Forcing server shutdown", dse);
        }
        throw dse;
    }
    // If we get to here, the HStores have been written.
    if (wal != null) {
        wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
    }
    // Record latest flush time
    for (Store store : storesToFlush) {
        this.lastStoreFlushTimeMap.put(store, startTime);
    }
    this.maxFlushedSeqId = flushedSeqId;
    this.lastFlushOpSeqId = flushOpSeqId;
    // e.g. checkResources().
    synchronized (this) {
        // FindBugs NN_NAKED_NOTIFY
        notifyAll();
    }
    long time = EnvironmentEdgeManager.currentTime() - startTime;
    long memstoresize = this.memstoreDataSize.get();
    String msg = "Finished memstore flush of ~" + StringUtils.byteDesc(prepareResult.totalFlushableSize.getDataSize()) + "/" + prepareResult.totalFlushableSize.getDataSize() + ", currentsize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + " for region " + this + " in " + time + "ms, sequenceid=" + flushOpSeqId + ", compaction requested=" + compactionRequested + ((wal == null) ? "; wal=null" : "");
    LOG.info(msg);
    status.setStatus(msg);
    if (rsServices != null && rsServices.getMetrics() != null) {
        rsServices.getMetrics().updateFlush(time - startTime, prepareResult.totalFlushableSize.getDataSize(), flushedOutputFileSize);
    }
    return new FlushResultImpl(compactionRequested ? FlushResult.Result.FLUSHED_COMPACTION_NEEDED : FlushResult.Result.FLUSHED_NO_COMPACTION_NEEDED, flushOpSeqId);
}
Also used : Path(org.apache.hadoop.fs.Path) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) FlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor) StoreFlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor) ArrayList(java.util.ArrayList) AbstractList(java.util.AbstractList) List(java.util.List)

Aggregations

DroppedSnapshotException (org.apache.hadoop.hbase.DroppedSnapshotException)12 IOException (java.io.IOException)9 InterruptedIOException (java.io.InterruptedIOException)7 Test (org.junit.Test)7 Put (org.apache.hadoop.hbase.client.Put)6 ArrayList (java.util.ArrayList)4 Configuration (org.apache.hadoop.conf.Configuration)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)3 FaultyFileSystem (org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem)3 AbstractList (java.util.AbstractList)2 List (java.util.List)2 Cell (org.apache.hadoop.hbase.Cell)2 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 NotServingRegionException (org.apache.hadoop.hbase.NotServingRegionException)2 RegionTooBusyException (org.apache.hadoop.hbase.RegionTooBusyException)2 FailedSanityCheckException (org.apache.hadoop.hbase.exceptions.FailedSanityCheckException)2