use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class HRegion method doClose.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "UL_UNRELEASED_LOCK_EXCEPTION_PATH", justification = "I think FindBugs is confused")
private Map<byte[], List<StoreFile>> doClose(final boolean abort, MonitoredTask status) throws IOException {
if (isClosed()) {
LOG.warn("Region " + this + " already closed");
return null;
}
if (coprocessorHost != null) {
status.setStatus("Running coprocessor pre-close hooks");
this.coprocessorHost.preClose(abort);
}
status.setStatus("Disabling compacts and flushes for region");
boolean canFlush = true;
synchronized (writestate) {
// Disable compacting and flushing by background threads for this
// region.
canFlush = !writestate.readOnly;
writestate.writesEnabled = false;
LOG.debug("Closing " + this + ": disabling compactions & flushes");
waitForFlushesAndCompactions();
}
// the close flag?
if (!abort && worthPreFlushing() && canFlush) {
status.setStatus("Pre-flushing region before close");
LOG.info("Running close preflush of " + getRegionInfo().getRegionNameAsString());
try {
internalFlushcache(status);
} catch (IOException ioe) {
// Failed to flush the region. Keep going.
status.setStatus("Failed pre-flush " + this + "; " + ioe.getMessage());
}
}
if (timeoutForWriteLock == null || timeoutForWriteLock == Long.MAX_VALUE) {
// block waiting for the lock for closing
// FindBugs: Complains UL_UNRELEASED_LOCK_EXCEPTION_PATH but seems fine
lock.writeLock().lock();
} else {
try {
boolean succeed = lock.writeLock().tryLock(timeoutForWriteLock, TimeUnit.SECONDS);
if (!succeed) {
throw new IOException("Failed to get write lock when closing region");
}
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
}
}
this.closing.set(true);
status.setStatus("Disabling writes for close");
try {
if (this.isClosed()) {
status.abort("Already got closed by another process");
// SplitTransaction handles the null
return null;
}
LOG.debug("Updates disabled for region " + this);
// Don't flush the cache if we are aborting
if (!abort && canFlush) {
int failedfFlushCount = 0;
int flushCount = 0;
long tmp = 0;
long remainingSize = this.memstoreDataSize.get();
while (remainingSize > 0) {
try {
internalFlushcache(status);
if (flushCount > 0) {
LOG.info("Running extra flush, " + flushCount + " (carrying snapshot?) " + this);
}
flushCount++;
tmp = this.memstoreDataSize.get();
if (tmp >= remainingSize) {
failedfFlushCount++;
}
remainingSize = tmp;
if (failedfFlushCount > 5) {
// so we do not lose data
throw new DroppedSnapshotException("Failed clearing memory after " + flushCount + " attempts on region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
}
} catch (IOException ioe) {
status.setStatus("Failed flush " + this + ", putting online again");
synchronized (writestate) {
writestate.writesEnabled = true;
}
// Have to throw to upper layers. I can't abort server from here.
throw ioe;
}
}
}
Map<byte[], List<StoreFile>> result = new TreeMap<>(Bytes.BYTES_COMPARATOR);
if (!stores.isEmpty()) {
// initialize the thread pool for closing stores in parallel.
ThreadPoolExecutor storeCloserThreadPool = getStoreOpenAndCloseThreadPool("StoreCloserThread-" + getRegionInfo().getRegionNameAsString());
CompletionService<Pair<byte[], Collection<StoreFile>>> completionService = new ExecutorCompletionService<>(storeCloserThreadPool);
// close each store in parallel
for (final Store store : stores.values()) {
MemstoreSize flushableSize = store.getSizeToFlush();
if (!(abort || flushableSize.getDataSize() == 0 || writestate.readOnly)) {
if (getRegionServerServices() != null) {
getRegionServerServices().abort("Assertion failed while closing store " + getRegionInfo().getRegionNameAsString() + " " + store + ". flushableSize expected=0, actual= " + flushableSize + ". Current memstoreSize=" + getMemstoreSize() + ". Maybe a coprocessor " + "operation failed and left the memstore in a partially updated state.", null);
}
}
completionService.submit(new Callable<Pair<byte[], Collection<StoreFile>>>() {
@Override
public Pair<byte[], Collection<StoreFile>> call() throws IOException {
return new Pair<>(store.getFamily().getName(), store.close());
}
});
}
try {
for (int i = 0; i < stores.size(); i++) {
Future<Pair<byte[], Collection<StoreFile>>> future = completionService.take();
Pair<byte[], Collection<StoreFile>> storeFiles = future.get();
List<StoreFile> familyFiles = result.get(storeFiles.getFirst());
if (familyFiles == null) {
familyFiles = new ArrayList<>();
result.put(storeFiles.getFirst(), familyFiles);
}
familyFiles.addAll(storeFiles.getSecond());
}
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof IOException) {
throw (IOException) cause;
}
throw new IOException(cause);
} finally {
storeCloserThreadPool.shutdownNow();
}
}
status.setStatus("Writing region close event to WAL");
if (!abort && wal != null && getRegionServerServices() != null && !writestate.readOnly) {
writeRegionCloseMarker(wal);
}
this.closed.set(true);
if (!canFlush) {
this.decrMemstoreSize(new MemstoreSize(memstoreDataSize.get(), getMemstoreHeapSize()));
} else if (memstoreDataSize.get() != 0) {
LOG.error("Memstore size is " + memstoreDataSize.get());
}
if (coprocessorHost != null) {
status.setStatus("Running coprocessor post-close hooks");
this.coprocessorHost.postClose(abort);
}
if (this.metricsRegion != null) {
this.metricsRegion.close();
}
if (this.metricsRegionWrapper != null) {
Closeables.closeQuietly(this.metricsRegionWrapper);
}
// stop the Compacted hfile discharger
if (this.compactedFileDischarger != null)
this.compactedFileDischarger.cancel(true);
status.markComplete("Closed");
LOG.info("Closed " + this);
return result;
} finally {
lock.writeLock().unlock();
}
}
use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class HRegion method internalFlushCacheAndCommit.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NN_NAKED_NOTIFY", justification = "Intentional; notify is about completed flush")
protected FlushResult internalFlushCacheAndCommit(final WAL wal, MonitoredTask status, final PrepareFlushResult prepareResult, final Collection<Store> storesToFlush) throws IOException {
// prepare flush context is carried via PrepareFlushResult
TreeMap<byte[], StoreFlushContext> storeFlushCtxs = prepareResult.storeFlushCtxs;
TreeMap<byte[], List<Path>> committedFiles = prepareResult.committedFiles;
long startTime = prepareResult.startTime;
long flushOpSeqId = prepareResult.flushOpSeqId;
long flushedSeqId = prepareResult.flushedSeqId;
String s = "Flushing stores of " + this;
status.setStatus(s);
if (LOG.isTraceEnabled())
LOG.trace(s);
// Any failure from here on out will be catastrophic requiring server
// restart so wal content can be replayed and put back into the memstore.
// Otherwise, the snapshot content while backed up in the wal, it will not
// be part of the current running servers state.
boolean compactionRequested = false;
long flushedOutputFileSize = 0;
try {
for (StoreFlushContext flush : storeFlushCtxs.values()) {
flush.flushCache(status);
}
// Switch snapshot (in memstore) -> new hfile (thus causing
// all the store scanners to reset/reseek).
Iterator<Store> it = storesToFlush.iterator();
// stores.values() and storeFlushCtxs have same order
for (StoreFlushContext flush : storeFlushCtxs.values()) {
boolean needsCompaction = flush.commit(status);
if (needsCompaction) {
compactionRequested = true;
}
byte[] storeName = it.next().getFamily().getName();
List<Path> storeCommittedFiles = flush.getCommittedFiles();
committedFiles.put(storeName, storeCommittedFiles);
// Flush committed no files, indicating flush is empty or flush was canceled
if (storeCommittedFiles == null || storeCommittedFiles.isEmpty()) {
MemstoreSize storeFlushableSize = prepareResult.storeFlushableSize.get(storeName);
prepareResult.totalFlushableSize.decMemstoreSize(storeFlushableSize);
}
flushedOutputFileSize += flush.getOutputFileSize();
}
storeFlushCtxs.clear();
// Set down the memstore size by amount of flush.
this.decrMemstoreSize(prepareResult.totalFlushableSize);
if (wal != null) {
// write flush marker to WAL. If fail, we should throw DroppedSnapshotException
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.COMMIT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, true, mvcc);
}
} catch (Throwable t) {
// all and sundry.
if (wal != null) {
try {
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
WALUtil.writeFlushMarker(wal, this.replicationScope, getRegionInfo(), desc, false, mvcc);
} catch (Throwable ex) {
LOG.warn(getRegionInfo().getEncodedName() + " : " + "failed writing ABORT_FLUSH marker to WAL", ex);
// ignore this since we will be aborting the RS with DSE.
}
wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
}
DroppedSnapshotException dse = new DroppedSnapshotException("region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()));
dse.initCause(t);
status.abort("Flush failed: " + StringUtils.stringifyException(t));
// Callers for flushcache() should catch DroppedSnapshotException and abort the region server.
// However, since we may have the region read lock, we cannot call close(true) here since
// we cannot promote to a write lock. Instead we are setting closing so that all other region
// operations except for close will be rejected.
this.closing.set(true);
if (rsServices != null) {
// This is a safeguard against the case where the caller fails to explicitly handle aborting
rsServices.abort("Replay of WAL required. Forcing server shutdown", dse);
}
throw dse;
}
// If we get to here, the HStores have been written.
if (wal != null) {
wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
}
// Record latest flush time
for (Store store : storesToFlush) {
this.lastStoreFlushTimeMap.put(store, startTime);
}
this.maxFlushedSeqId = flushedSeqId;
this.lastFlushOpSeqId = flushOpSeqId;
// e.g. checkResources().
synchronized (this) {
// FindBugs NN_NAKED_NOTIFY
notifyAll();
}
long time = EnvironmentEdgeManager.currentTime() - startTime;
long memstoresize = this.memstoreDataSize.get();
String msg = "Finished memstore flush of ~" + StringUtils.byteDesc(prepareResult.totalFlushableSize.getDataSize()) + "/" + prepareResult.totalFlushableSize.getDataSize() + ", currentsize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + " for region " + this + " in " + time + "ms, sequenceid=" + flushOpSeqId + ", compaction requested=" + compactionRequested + ((wal == null) ? "; wal=null" : "");
LOG.info(msg);
status.setStatus(msg);
if (rsServices != null && rsServices.getMetrics() != null) {
rsServices.getMetrics().updateFlush(time - startTime, prepareResult.totalFlushableSize.getDataSize(), flushedOutputFileSize);
}
return new FlushResultImpl(compactionRequested ? FlushResult.Result.FLUSHED_COMPACTION_NEEDED : FlushResult.Result.FLUSHED_NO_COMPACTION_NEEDED, flushOpSeqId);
}
Aggregations