use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.
the class HRegion method internalPrepareFlushCache.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FindBugs seems confused about trxId")
protected PrepareFlushResult internalPrepareFlushCache(final WAL wal, final long myseqid, final Collection<Store> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker) throws IOException {
if (this.rsServices != null && this.rsServices.isAborted()) {
// Don't flush when server aborting, it's unsafe
throw new IOException("Aborting flush because server is aborted...");
}
final long startTime = EnvironmentEdgeManager.currentTime();
// to go get one.
if (this.memstoreDataSize.get() <= 0) {
// Take an update lock so no edits can come into memory just yet.
this.updatesLock.writeLock().lock();
WriteEntry writeEntry = null;
try {
if (this.memstoreDataSize.get() <= 0) {
// Presume that if there are still no edits in the memstore, then there are no edits for
// this region out in the WAL subsystem so no need to do any trickery clearing out
// edits in the WAL sub-system. Up the sequence number so the resulting flush id is for
// sure just beyond the last appended region edit and not associated with any edit
// (useful as marker when bulk loading, etc.).
FlushResult flushResult = null;
if (wal != null) {
writeEntry = mvcc.begin();
long flushOpSeqId = writeEntry.getWriteNumber();
flushResult = new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
mvcc.completeAndWait(writeEntry);
// Set to null so we don't complete it again down in finally block.
writeEntry = null;
return new PrepareFlushResult(flushResult, myseqid);
} else {
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
}
}
} finally {
if (writeEntry != null) {
// If writeEntry is non-null, this operation failed; the mvcc transaction failed...
// but complete it anyways so it doesn't block the mvcc queue.
mvcc.complete(writeEntry);
}
this.updatesLock.writeLock().unlock();
}
}
logFatLineOnFlush(storesToFlush, myseqid);
// Stop updates while we snapshot the memstore of all of these regions' stores. We only have
// to do this for a moment. It is quick. We also set the memstore size to zero here before we
// allow updates again so its value will represent the size of the updates received
// during flush
// We have to take an update lock during snapshot, or else a write could end up in both snapshot
// and memstore (makes it difficult to do atomic rows then)
status.setStatus("Obtaining lock to block concurrent updates");
// block waiting for the lock for internal flush
this.updatesLock.writeLock().lock();
status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
MemstoreSize totalSizeOfFlushableStores = new MemstoreSize();
Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
for (Store store : storesToFlush) {
flushedFamilyNamesToSeq.put(store.getFamily().getName(), ((HStore) store).preFlushSeqIDEstimation());
}
TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], MemstoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
// The sequence id of this flush operation which is used to log FlushMarker and pass to
// createFlushContext to use as the store file's sequence id. It can be in advance of edits
// still in the memstore, edits that are in other column families yet to be flushed.
long flushOpSeqId = HConstants.NO_SEQNUM;
// The max flushed sequence id after this flush operation completes. All edits in memstore
// will be in advance of this sequence id.
long flushedSeqId = HConstants.NO_SEQNUM;
byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
try {
if (wal != null) {
Long earliestUnflushedSequenceIdForTheRegion = wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
if (earliestUnflushedSequenceIdForTheRegion == null) {
// This should never happen. This is how startCacheFlush signals flush cannot proceed.
String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
status.setStatus(msg);
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false), myseqid);
}
flushOpSeqId = getNextSequenceId(wal);
// Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
flushedSeqId = earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM ? flushOpSeqId : earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
} else {
// use the provided sequence Id as WAL is not being used for this flush.
flushedSeqId = flushOpSeqId = myseqid;
}
for (Store s : storesToFlush) {
MemstoreSize flushableSize = s.getSizeToFlush();
totalSizeOfFlushableStores.incMemstoreSize(flushableSize);
storeFlushCtxs.put(s.getFamily().getName(), s.createFlushContext(flushOpSeqId));
// for writing stores to WAL
committedFiles.put(s.getFamily().getName(), null);
storeFlushableSize.put(s.getFamily().getName(), flushableSize);
}
// write the snapshot start to WAL
if (wal != null && !writestate.readOnly) {
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
// No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false, mvcc);
}
// Prepare flush (take a snapshot)
for (StoreFlushContext flush : storeFlushCtxs.values()) {
flush.prepare();
}
} catch (IOException ex) {
doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
throw ex;
} finally {
this.updatesLock.writeLock().unlock();
}
String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " + "flushsize=" + totalSizeOfFlushableStores;
status.setStatus(s);
doSyncOfUnflushedWALChanges(wal, getRegionInfo());
return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
}
use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.
the class HRegion method doDelta.
/**
* Add "deltas" to Cells. Deltas are increments or appends. Switch on <code>op</code>.
*
* <p>If increment, add deltas to current values or if an append, then
* append the deltas to the current Cell values.
*
* <p>Append and Increment code paths are mostly the same. They differ in just a few places.
* This method does the code path for increment and append and then in key spots, switches
* on the passed in <code>op</code> to do increment or append specific paths.
*/
private Result doDelta(Operation op, Mutation mutation, long nonceGroup, long nonce, boolean returnResults) throws IOException {
checkReadOnly();
checkResources();
checkRow(mutation.getRow(), op.toString());
checkFamilies(mutation.getFamilyCellMap().keySet());
this.writeRequestsCount.increment();
WriteEntry writeEntry = null;
startRegionOperation(op);
List<Cell> results = returnResults ? new ArrayList<>(mutation.size()) : null;
RowLock rowLock = null;
MemstoreSize memstoreSize = new MemstoreSize();
try {
rowLock = getRowLockInternal(mutation.getRow(), false);
lock(this.updatesLock.readLock());
try {
Result cpResult = doCoprocessorPreCall(op, mutation);
if (cpResult != null) {
return returnResults ? cpResult : null;
}
Durability effectiveDurability = getEffectiveDurability(mutation.getDurability());
Map<Store, List<Cell>> forMemStore = new HashMap<>(mutation.getFamilyCellMap().size());
// Reckon Cells to apply to WAL -- in returned walEdit -- and what to add to memstore and
// what to return back to the client (in 'forMemStore' and 'results' respectively).
WALEdit walEdit = reckonDeltas(op, mutation, effectiveDurability, forMemStore, results);
// Actually write to WAL now if a walEdit to apply.
if (walEdit != null && !walEdit.isEmpty()) {
writeEntry = doWALAppend(walEdit, effectiveDurability, nonceGroup, nonce);
} else {
// If walEdits is empty, it means we skipped the WAL; update LongAdders and start an mvcc
// transaction.
recordMutationWithoutWal(mutation.getFamilyCellMap());
writeEntry = mvcc.begin();
updateSequenceId(forMemStore.values(), writeEntry.getWriteNumber());
}
// Now write to MemStore. Do it a column family at a time.
for (Map.Entry<Store, List<Cell>> e : forMemStore.entrySet()) {
applyToMemstore(e.getKey(), e.getValue(), true, memstoreSize);
}
mvcc.completeAndWait(writeEntry);
if (rsServices != null && rsServices.getNonceManager() != null) {
rsServices.getNonceManager().addMvccToOperationContext(nonceGroup, nonce, writeEntry.getWriteNumber());
}
writeEntry = null;
} finally {
this.updatesLock.readLock().unlock();
}
// If results is null, then client asked that we not return the calculated results.
return results != null && returnResults ? Result.create(results) : Result.EMPTY_RESULT;
} finally {
// a 0 increment.
if (writeEntry != null)
mvcc.complete(writeEntry);
if (rowLock != null) {
rowLock.release();
}
// Request a cache flush if over the limit. Do it outside update lock.
if (isFlushSize(addAndGetMemstoreSize(memstoreSize))) {
requestFlush();
}
closeRegionOperation(op);
if (this.metricsRegion != null) {
switch(op) {
case INCREMENT:
this.metricsRegion.updateIncrement();
break;
case APPEND:
this.metricsRegion.updateAppend();
break;
default:
break;
}
}
}
}
use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.
the class WALUtil method doFullMarkerAppendTransaction.
/**
* A 'full' WAL transaction involves starting an mvcc transaction followed by an append, an
* optional sync, and then a call to complete the mvcc transaction. This method does it all. Good
* for case of adding a single edit or marker to the WAL.
* <p/>
* This write is for internal use only. Not for external client consumption.
* @return WALKeyImpl that was added to the WAL.
*/
private static WALKeyImpl doFullMarkerAppendTransaction(final WAL wal, final NavigableMap<byte[], Integer> replicationScope, final RegionInfo hri, final WALEdit edit, final MultiVersionConcurrencyControl mvcc, final Map<String, byte[]> extendedAttributes, final boolean sync, final RegionReplicationSink sink) throws IOException {
// TODO: Pass in current time to use?
WALKeyImpl walKey = new WALKeyImpl(hri.getEncodedNameAsBytes(), hri.getTable(), EnvironmentEdgeManager.currentTime(), mvcc, replicationScope, extendedAttributes);
long trx = MultiVersionConcurrencyControl.NONE;
try {
trx = wal.appendMarker(hri, walKey, edit);
WriteEntry writeEntry = walKey.getWriteEntry();
if (sink != null) {
writeEntry.attachCompletionAction(() -> sink.add(walKey, edit, RpcServer.getCurrentServerCallWithCellScanner().orElse(null)));
}
if (sync) {
wal.sync(trx);
}
// Call complete only here because these are markers only. They are not for clients to read.
mvcc.complete(writeEntry);
} catch (IOException ioe) {
if (walKey.getWriteEntry() != null) {
mvcc.complete(walKey.getWriteEntry());
}
throw ioe;
}
return walKey;
}
use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.
the class HRegion method processRowsWithLocks.
@Override
public void processRowsWithLocks(RowProcessor<?, ?> processor, long timeout, long nonceGroup, long nonce) throws IOException {
for (byte[] row : processor.getRowsToLock()) {
checkRow(row, "processRowsWithLocks");
}
if (!processor.readOnly()) {
checkReadOnly();
}
checkResources();
startRegionOperation();
WALEdit walEdit = new WALEdit();
// STEP 1. Run pre-process hook
preProcess(processor, walEdit);
// Short circuit the read only case
if (processor.readOnly()) {
try {
long now = EnvironmentEdgeManager.currentTime();
doProcessRowWithTimeout(processor, now, this, null, null, timeout);
processor.postProcess(this, walEdit, true);
} finally {
closeRegionOperation();
}
return;
}
boolean locked = false;
List<RowLock> acquiredRowLocks = null;
List<Mutation> mutations = new ArrayList<>();
Collection<byte[]> rowsToLock = processor.getRowsToLock();
// This is assigned by mvcc either explicity in the below or in the guts of the WAL append
// when it assigns the edit a sequencedid (A.K.A the mvcc write number).
WriteEntry writeEntry = null;
MemstoreSize memstoreSize = new MemstoreSize();
try {
boolean success = false;
try {
// STEP 2. Acquire the row lock(s)
acquiredRowLocks = new ArrayList<>(rowsToLock.size());
for (byte[] row : rowsToLock) {
// Attempt to lock all involved rows, throw if any lock times out
// use a writer lock for mixed reads and writes
acquiredRowLocks.add(getRowLockInternal(row, false));
}
// STEP 3. Region lock
lock(this.updatesLock.readLock(), acquiredRowLocks.isEmpty() ? 1 : acquiredRowLocks.size());
locked = true;
long now = EnvironmentEdgeManager.currentTime();
// STEP 4. Let the processor scan the rows, generate mutations and add waledits
doProcessRowWithTimeout(processor, now, this, mutations, walEdit, timeout);
if (!mutations.isEmpty()) {
// STEP 5. Call the preBatchMutate hook
processor.preBatchMutate(this, walEdit);
// STEP 6. Append and sync if walEdit has data to write out.
if (!walEdit.isEmpty()) {
writeEntry = doWALAppend(walEdit, getEffectiveDurability(processor.useDurability()), processor.getClusterIds(), now, nonceGroup, nonce);
} else {
// We are here if WAL is being skipped.
writeEntry = this.mvcc.begin();
}
// STEP 7. Apply to memstore
long sequenceId = writeEntry.getWriteNumber();
for (Mutation m : mutations) {
// Handle any tag based cell features.
// TODO: Do we need to call rewriteCellTags down in applyToMemstore()? Why not before
// so tags go into WAL?
rewriteCellTags(m.getFamilyCellMap(), m);
for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance(); ) {
Cell cell = cellScanner.current();
if (walEdit.isEmpty()) {
// If walEdit is empty, we put nothing in WAL. WAL stamps Cells with sequence id.
// If no WAL, need to stamp it here.
CellUtil.setSequenceId(cell, sequenceId);
}
applyToMemstore(getHStore(cell), cell, memstoreSize);
}
}
// STEP 8. call postBatchMutate hook
processor.postBatchMutate(this);
// STEP 9. Complete mvcc.
mvcc.completeAndWait(writeEntry);
writeEntry = null;
// STEP 10. Release region lock
if (locked) {
this.updatesLock.readLock().unlock();
locked = false;
}
// STEP 11. Release row lock(s)
releaseRowLocks(acquiredRowLocks);
}
success = true;
} finally {
// Call complete rather than completeAndWait because we probably had error if walKey != null
if (writeEntry != null)
mvcc.complete(writeEntry);
if (locked) {
this.updatesLock.readLock().unlock();
}
// release locks if some were acquired but another timed out
releaseRowLocks(acquiredRowLocks);
}
// 12. Run post-process hook
processor.postProcess(this, walEdit, success);
} finally {
closeRegionOperation();
if (!mutations.isEmpty()) {
long newSize = this.addAndGetMemstoreSize(memstoreSize);
requestFlushIfNeeded(newSize);
}
}
}
use of org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry in project hbase by apache.
the class HRegion method internalPrepareFlushCache.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DLS_DEAD_LOCAL_STORE", justification = "FindBugs seems confused about trxId")
protected PrepareFlushResult internalPrepareFlushCache(WAL wal, long myseqid, Collection<HStore> storesToFlush, MonitoredTask status, boolean writeFlushWalMarker, FlushLifeCycleTracker tracker) throws IOException {
if (this.rsServices != null && this.rsServices.isAborted()) {
// Don't flush when server aborting, it's unsafe
throw new IOException("Aborting flush because server is aborted...");
}
final long startTime = EnvironmentEdgeManager.currentTime();
// to go get one.
if (this.memStoreSizing.getDataSize() <= 0) {
// Take an update lock so no edits can come into memory just yet.
this.updatesLock.writeLock().lock();
WriteEntry writeEntry = null;
try {
if (this.memStoreSizing.getDataSize() <= 0) {
// (useful as marker when bulk loading, etc.).
if (wal != null) {
writeEntry = mvcc.begin();
long flushOpSeqId = writeEntry.getWriteNumber();
FlushResultImpl flushResult = new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush", writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
mvcc.completeAndWait(writeEntry);
// Set to null so we don't complete it again down in finally block.
writeEntry = null;
return new PrepareFlushResult(flushResult, myseqid);
} else {
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush", false), myseqid);
}
}
} finally {
if (writeEntry != null) {
// If writeEntry is non-null, this operation failed; the mvcc transaction failed...
// but complete it anyways so it doesn't block the mvcc queue.
mvcc.complete(writeEntry);
}
this.updatesLock.writeLock().unlock();
}
}
logFatLineOnFlush(storesToFlush, myseqid);
// Stop updates while we snapshot the memstore of all of these regions' stores. We only have
// to do this for a moment. It is quick. We also set the memstore size to zero here before we
// allow updates again so its value will represent the size of the updates received
// during flush
// We have to take an update lock during snapshot, or else a write could end up in both snapshot
// and memstore (makes it difficult to do atomic rows then)
status.setStatus("Obtaining lock to block concurrent updates");
// block waiting for the lock for internal flush
this.updatesLock.writeLock().lock();
status.setStatus("Preparing flush snapshotting stores in " + getRegionInfo().getEncodedName());
MemStoreSizing totalSizeOfFlushableStores = new NonThreadSafeMemStoreSizing();
Map<byte[], Long> flushedFamilyNamesToSeq = new HashMap<>();
for (HStore store : storesToFlush) {
flushedFamilyNamesToSeq.put(store.getColumnFamilyDescriptor().getName(), store.preFlushSeqIDEstimation());
}
TreeMap<byte[], StoreFlushContext> storeFlushCtxs = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], List<Path>> committedFiles = new TreeMap<>(Bytes.BYTES_COMPARATOR);
TreeMap<byte[], MemStoreSize> storeFlushableSize = new TreeMap<>(Bytes.BYTES_COMPARATOR);
// The sequence id of this flush operation which is used to log FlushMarker and pass to
// createFlushContext to use as the store file's sequence id. It can be in advance of edits
// still in the memstore, edits that are in other column families yet to be flushed.
long flushOpSeqId = HConstants.NO_SEQNUM;
// The max flushed sequence id after this flush operation completes. All edits in memstore
// will be in advance of this sequence id.
long flushedSeqId = HConstants.NO_SEQNUM;
byte[] encodedRegionName = getRegionInfo().getEncodedNameAsBytes();
try {
if (wal != null) {
Long earliestUnflushedSequenceIdForTheRegion = wal.startCacheFlush(encodedRegionName, flushedFamilyNamesToSeq);
if (earliestUnflushedSequenceIdForTheRegion == null) {
// This should never happen. This is how startCacheFlush signals flush cannot proceed.
String msg = this.getRegionInfo().getEncodedName() + " flush aborted; WAL closing.";
status.setStatus(msg);
return new PrepareFlushResult(new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false), myseqid);
}
flushOpSeqId = getNextSequenceId(wal);
// Back up 1, minus 1 from oldest sequence id in memstore to get last 'flushed' edit
flushedSeqId = earliestUnflushedSequenceIdForTheRegion.longValue() == HConstants.NO_SEQNUM ? flushOpSeqId : earliestUnflushedSequenceIdForTheRegion.longValue() - 1;
} else {
// use the provided sequence Id as WAL is not being used for this flush.
flushedSeqId = flushOpSeqId = myseqid;
}
for (HStore s : storesToFlush) {
storeFlushCtxs.put(s.getColumnFamilyDescriptor().getName(), s.createFlushContext(flushOpSeqId, tracker));
// for writing stores to WAL
committedFiles.put(s.getColumnFamilyDescriptor().getName(), null);
}
// write the snapshot start to WAL
if (wal != null && !writestate.readOnly) {
FlushDescriptor desc = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, getRegionInfo(), flushOpSeqId, committedFiles);
// No sync. Sync is below where no updates lock and we do FlushAction.COMMIT_FLUSH
WALUtil.writeFlushMarker(wal, this.getReplicationScope(), getRegionInfo(), desc, false, mvcc, regionReplicationSink.orElse(null));
}
// Prepare flush (take a snapshot)
storeFlushCtxs.forEach((name, flush) -> {
MemStoreSize snapshotSize = flush.prepare();
totalSizeOfFlushableStores.incMemStoreSize(snapshotSize);
storeFlushableSize.put(name, snapshotSize);
});
} catch (IOException ex) {
doAbortFlushToWAL(wal, flushOpSeqId, committedFiles);
throw ex;
} finally {
this.updatesLock.writeLock().unlock();
}
String s = "Finished memstore snapshotting " + this + ", syncing WAL and waiting on mvcc, " + "flushsize=" + totalSizeOfFlushableStores;
status.setStatus(s);
doSyncOfUnflushedWALChanges(wal, getRegionInfo());
return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId, flushedSeqId, totalSizeOfFlushableStores);
}
Aggregations