use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor in project hbase by apache.
the class HRegion method internalFlushCacheAndCommit.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NN_NAKED_NOTIFY", justification = "Intentional; notify is about completed flush")
FlushResultImpl internalFlushCacheAndCommit(WAL wal, MonitoredTask status, PrepareFlushResult prepareResult, Collection<HStore> storesToFlush) throws IOException {
// prepare flush context is carried via PrepareFlushResult
TreeMap<byte[], StoreFlushContext> storeFlushCtxs = prepareResult.storeFlushCtxs;
TreeMap<byte[], List<Path>> committedFiles = prepareResult.committedFiles;
long startTime = prepareResult.startTime;
long flushOpSeqId = prepareResult.flushOpSeqId;
long flushedSeqId = prepareResult.flushedSeqId;
String s = "Flushing stores of " + this;
status.setStatus(s);
if (LOG.isTraceEnabled())
LOG.trace(s);
// Any failure from here on out will be catastrophic requiring server
// restart so wal content can be replayed and put back into the memstore.
// Otherwise, the snapshot content while backed up in the wal, it will not
// be part of the current running servers state.
boolean compactionRequested = false;
long flushedOutputFileSize = 0;
try {
for (StoreFlushContext flush : storeFlushCtxs.values()) {
flush.flushCache(status);
}
// all the store scanners to reset/reseek).
for (Map.Entry<byte[], StoreFlushContext> flushEntry : storeFlushCtxs.entrySet()) {
StoreFlushContext sfc = flushEntry.getValue();
boolean needsCompaction = sfc.commit(status);
if (needsCompaction) {
compactionRequested = true;
}
byte[] storeName = flushEntry.getKey();
List<Path> storeCommittedFiles = sfc.getCommittedFiles();
committedFiles.put(storeName, storeCommittedFiles);
// Flush committed no files, indicating flush is empty or flush was canceled
if (storeCommittedFiles == null || storeCommittedFiles.isEmpty()) {
MemStoreSize storeFlushableSize = prepareResult.storeFlushableSize.get(storeName);
prepareResult.totalFlushableSize.decMemStoreSize(storeFlushableSize);
}
flushedOutputFileSize += sfc.getOutputFileSize();
}
storeFlushCtxs.clear();
// Set down the memstore size by amount of flush.
MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize();
this.decrMemStoreSize(mss);
// During startup, quota manager may not be initialized yet.
if (rsServices != null) {
RegionServerSpaceQuotaManager quotaManager = rsServices.getRegionServerSpaceQuotaManager();
if (quotaManager != null) {
quotaManager.getRegionSizeStore().incrementRegionSize(this.getRegionInfo(), flushedOutputFileSize);
}
}
if (wal != null) {
// write flush marker to WAL. If fail, we should throw DroppedSnapshotException
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.COMMIT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, true, mvcc, regionReplicationSink.orElse(null));
}
} catch (Throwable t) {
// all and sundry.
if (wal != null) {
try {
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.ABORT_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
WALUtil.writeFlushMarker(wal, this.replicationScope, getRegionInfo(), desc, false, mvcc, null);
} catch (Throwable ex) {
LOG.warn(getRegionInfo().getEncodedName() + " : " + "failed writing ABORT_FLUSH marker to WAL", ex);
// ignore this since we will be aborting the RS with DSE.
}
wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
}
DroppedSnapshotException dse = new DroppedSnapshotException("region: " + Bytes.toStringBinary(getRegionInfo().getRegionName()), t);
status.abort("Flush failed: " + StringUtils.stringifyException(t));
// Callers for flushcache() should catch DroppedSnapshotException and abort the region server.
// However, since we may have the region read lock, we cannot call close(true) here since
// we cannot promote to a write lock. Instead we are setting closing so that all other region
// operations except for close will be rejected.
this.closing.set(true);
if (rsServices != null) {
// This is a safeguard against the case where the caller fails to explicitly handle aborting
rsServices.abort("Replay of WAL required. Forcing server shutdown", dse);
}
throw dse;
}
// If we get to here, the HStores have been written.
if (wal != null) {
wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes(), flushedSeqId);
}
// Record latest flush time
for (HStore store : storesToFlush) {
this.lastStoreFlushTimeMap.put(store, startTime);
}
this.maxFlushedSeqId = flushedSeqId;
this.lastFlushOpSeqId = flushOpSeqId;
// e.g. checkResources().
synchronized (this) {
// FindBugs NN_NAKED_NOTIFY
notifyAll();
}
long time = EnvironmentEdgeManager.currentTime() - startTime;
MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize();
long memstoresize = this.memStoreSizing.getMemStoreSize().getDataSize();
String msg = "Finished flush of" + " dataSize ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize() + ", heapSize ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize() + ", currentSize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + " for " + this.getRegionInfo().getEncodedName() + " in " + time + "ms, sequenceid=" + flushOpSeqId + ", compaction requested=" + compactionRequested + ((wal == null) ? "; wal=null" : "");
LOG.info(msg);
status.setStatus(msg);
if (rsServices != null && rsServices.getMetrics() != null) {
rsServices.getMetrics().updateFlush(getTableDescriptor().getTableName().getNameAsString(), time, mss.getDataSize(), flushedOutputFileSize);
}
return new FlushResultImpl(compactionRequested ? FlushResult.Result.FLUSHED_COMPACTION_NEEDED : FlushResult.Result.FLUSHED_NO_COMPACTION_NEEDED, flushOpSeqId);
}
use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor in project hbase by apache.
the class HRegion method internalPrepareFlushCache.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FindBugs seems confused about trxId")
protected PrepareFlushResult internalPrepareFlushCache(WAL wal, long myseqid, Collection<HStore> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker, FlushLifeCycleTracker tracker) throws IOException {
if (this.rsServices != null && this.rsServices.isAborted()) {
// Don't flush when server aborting, it's unsafe
throw new IOException("Aborting flush because server is aborted...");
}
final long startTime = EnvironmentEdgeManager.currentTime();
// to go get one.
if (this.memStoreSizing.getDataSize() <= 0) {
// Take an update lock so no edits can come into memory just yet.
this.updatesLock.writeLock().lock();
WriteEntry writeEntry = null;
try {
if (this.memStoreSizing.getDataSize() <= 0) {
// (useful as marker when bulk loading, etc.).
if (wal != null) {
writeEntry = mvcc.begin();
long flushOpSeqId = writeEntry.getWriteNumber();
FlushResultImpl flushResult = new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
mvcc.completeAndWait(writeEntry);
// Set to null so we don't complete it again down in finally block.
writeEntry = null;
return new PrepareFlushResult(flushResult, myseqid);
} else {
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
}
}
} finally {
if (writeEntry != null) {
// If writeEntry is non-null, this operation failed; the mvcc transaction failed...
// but complete it anyways so it doesn't block the mvcc queue.
mvcc.complete(writeEntry);
}
this.updatesLock.writeLock().unlock();
}
}
logFatLineOnFlush(storesToFlush, myseqid);
// Stop updates while we snapshot the memstore of all of these regions' stores. We only have
// to do this for a moment. It is quick. We also set the memstore size to zero here before we
// allow updates again so its value will represent the size of the updates received
// during flush
// We have to take an update lock during snapshot, or else a write could end up in both snapshot
// and memstore (makes it difficult to do atomic rows then)
status.setStatus("Obtaining lock to block concurrent updates");
// block waiting for the lock for internal flush
this.updatesLock.writeLock().lock();
status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
MemStoreSizing totalSizeOfFlushableStores = new NonThreadSafeMemStoreSizing();
Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
for (HStore store : storesToFlush) {
flushedFamilyNamesToSeq.put(store.getColumnFamilyDescriptor().getName(), store.preFlushSeqIDEstimation());
}
TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], MemStoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
// The sequence id of this flush operation which is used to log FlushMarker and pass to
// createFlushContext to use as the store file's sequence id. It can be in advance of edits
// still in the memstore, edits that are in other column families yet to be flushed.
long flushOpSeqId = HConstants.NO_SEQNUM;
// The max flushed sequence id after this flush operation completes. All edits in memstore
// will be in advance of this sequence id.
long flushedSeqId = HConstants.NO_SEQNUM;
byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
try {
if (wal != null) {
Long earliestUnflushedSequenceIdForTheRegion = wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
if (earliestUnflushedSequenceIdForTheRegion == null) {
// This should never happen. This is how startCacheFlush signals flush cannot proceed.
String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
status.setStatus(msg);
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false), myseqid);
}
flushOpSeqId = getNextSequenceId(wal);
// Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
flushedSeqId = earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM ? flushOpSeqId : earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
} else {
// use the provided sequence Id as WAL is not being used for this flush.
flushedSeqId = flushOpSeqId = myseqid;
}
for (HStore s : storesToFlush) {
storeFlushCtxs.put(s.getColumnFamilyDescriptor().getName(), s.createFlushContext(flushOpSeqId, tracker));
// for writing stores to WAL
committedFiles.put(s.getColumnFamilyDescriptor().getName(), null);
}
// write the snapshot start to WAL
if (wal != null && !writestate.readOnly) {
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
// No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false, mvcc, regionReplicationSink.orElse(null));
}
// Prepare flush (take a snapshot)
storeFlushCtxs.forEach((name, flush) -> {
MemStoreSize snapshotSize = flush.prepare();
totalSizeOfFlushableStores.incMemStoreSize(snapshotSize);
storeFlushableSize.put(name, snapshotSize);
});
} catch (IOException ex) {
doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
throw ex;
} finally {
this.updatesLock.writeLock().unlock();
}
String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " + "flushsize=" + totalSizeOfFlushableStores;
status.setStatus(s);
doSyncOfUnflushedWALChanges(wal, getRegionInfo());
return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
}
use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor in project hbase by apache.
the class ProtobufUtil method toFlushDescriptor.
public static FlushDescriptor toFlushDescriptor(FlushAction action, org.apache.hadoop.hbase.client.RegionInfo hri, long flushSeqId, Map<byte[], List<Path>> committedFiles) {
FlushDescriptor.Builder desc = FlushDescriptor.newBuilder().setAction(action).setEncodedRegionName(UnsafeByteOperations.unsafeWrap(hri.getEncodedNameAsBytes())).setRegionName(UnsafeByteOperations.unsafeWrap(hri.getRegionName())).setFlushSequenceNumber(flushSeqId).setTableName(UnsafeByteOperations.unsafeWrap(hri.getTable().getName()));
for (Map.Entry<byte[], List<Path>> entry : committedFiles.entrySet()) {
WALProtos.FlushDescriptor.StoreFlushDescriptor.Builder builder = WALProtos.FlushDescriptor.StoreFlushDescriptor.newBuilder().setFamilyName(UnsafeByteOperations.unsafeWrap(entry.getKey())).setStoreHomeDir(// relative to region
Bytes.toString(entry.getKey()));
if (entry.getValue() != null) {
for (Path path : entry.getValue()) {
builder.addFlushOutput(path.getName());
}
}
desc.addStoreFlushes(builder);
}
return desc.build();
}
use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor in project hbase by apache.
the class TestHRegionReplayEvents method testReplayingFlushRequestRestoresReadsEnabledState.
/**
* Test the case where the secondary region replica is not in reads enabled state because it is
* waiting for a flush or region open marker from primary region. Replaying CANNOT_FLUSH
* flush marker entry should restore the reads enabled status in the region and allow the reads
* to continue.
*/
@Test
public void testReplayingFlushRequestRestoresReadsEnabledState() throws IOException {
disableReads(secondaryRegion);
// Test case 1: Test that replaying CANNOT_FLUSH request marker assuming this came from
// triggered flush restores readsEnabled
primaryRegion.flushcache(true, true, FlushLifeCycleTracker.DUMMY);
reader = createWALReaderForPrimary();
while (true) {
WAL.Entry entry = reader.next();
if (entry == null) {
break;
}
FlushDescriptor flush = WALEdit.getFlushDescriptor(entry.getEdit().getCells().get(0));
if (flush != null) {
secondaryRegion.replayWALFlushMarker(flush, entry.getKey().getSequenceId());
}
}
// now reads should be enabled
secondaryRegion.get(new Get(Bytes.toBytes(0)));
}
use of org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor in project hbase by apache.
the class TestHRegionReplayEvents method testReplayFlushCommitMarkerSmallerThanFlushStartMarker.
/**
* Tests the case where we prepare a flush with some seqId and we receive a flush commit marker
* less than the previous flush start marker.
*/
@Test
public void testReplayFlushCommitMarkerSmallerThanFlushStartMarker() throws IOException {
// load some data to primary and flush. 2 flushes and some more unflushed data
putDataWithFlushes(primaryRegion, 100, 200, 100);
int numRows = 300;
// now replay the edits and the flush marker
reader = createWALReaderForPrimary();
LOG.info("-- Replaying edits and flush events in secondary");
FlushDescriptor startFlushDesc = null;
FlushDescriptor commitFlushDesc = null;
int lastReplayed = 0;
while (true) {
System.out.println(lastReplayed);
WAL.Entry entry = reader.next();
if (entry == null) {
break;
}
FlushDescriptor flushDesc = WALEdit.getFlushDescriptor(entry.getEdit().getCells().get(0));
if (flushDesc != null) {
if (flushDesc.getAction() == FlushAction.START_FLUSH) {
// don't replay the first flush start marker, hold on to it, replay the second one
if (startFlushDesc == null) {
startFlushDesc = flushDesc;
} else {
LOG.info("-- Replaying flush start in secondary");
startFlushDesc = flushDesc;
PrepareFlushResult result = secondaryRegion.replayWALFlushStartMarker(startFlushDesc);
assertNull(result.result);
}
} else if (flushDesc.getAction() == FlushAction.COMMIT_FLUSH) {
// do not replay any flush commit yet
if (commitFlushDesc == null) {
// hold on to the first flush commit marker
commitFlushDesc = flushDesc;
}
}
// after replay verify that everything is still visible
verifyData(secondaryRegion, 0, lastReplayed + 1, cq, families);
} else {
lastReplayed = replayEdit(secondaryRegion, entry);
}
}
// at this point, there should be some data (rows 0-200) in memstore snapshot
// and some more data in memstores (rows 200-300)
verifyData(secondaryRegion, 0, numRows, cq, families);
// no store files in the region
int expectedStoreFileCount = 0;
for (HStore s : secondaryRegion.getStores()) {
assertEquals(expectedStoreFileCount, s.getStorefilesCount());
}
long regionMemstoreSize = secondaryRegion.getMemStoreDataSize();
// Test case 1: replay the a flush commit marker smaller than what we have prepared
LOG.info("Testing replaying flush COMMIT " + commitFlushDesc + " on top of flush START" + startFlushDesc);
assertTrue(commitFlushDesc.getFlushSequenceNumber() < startFlushDesc.getFlushSequenceNumber());
LOG.info("-- Replaying flush commit in secondary" + commitFlushDesc);
secondaryRegion.replayWALFlushCommitMarker(commitFlushDesc);
// assert that the flush files are picked
expectedStoreFileCount++;
for (HStore s : secondaryRegion.getStores()) {
assertEquals(expectedStoreFileCount, s.getStorefilesCount());
}
HStore store = secondaryRegion.getStore(Bytes.toBytes("cf1"));
MemStoreSize mss = store.getFlushableSize();
// assert that the memstore is not dropped
assertTrue(mss.getHeapSize() > 0);
// assert that the region memstore is same as before
long newRegionMemstoreSize = secondaryRegion.getMemStoreDataSize();
assertEquals(regionMemstoreSize, newRegionMemstoreSize);
// not dropped
assertNotNull(secondaryRegion.getPrepareFlushResult());
LOG.info("-- Verifying edits from secondary");
verifyData(secondaryRegion, 0, numRows, cq, families);
LOG.info("-- Verifying edits from primary.");
verifyData(primaryRegion, 0, numRows, cq, families);
}
Aggregations