use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class HRegion method doMiniBatchMutate.
/**
* Called to do a piece of the batch that came in to {@link #batchMutate(Mutation[], long, long)}
* In here we also handle replay of edits on region recover.
* @return Change in size brought about by applying <code>batchOp</code>
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "UL_UNRELEASED_LOCK", justification = "Findbugs seems to be confused on this.")
@SuppressWarnings("unchecked")
private // TODO: This needs a rewrite. Doesn't have to be this long. St.Ack 20160120
void doMiniBatchMutate(BatchOperation<?> batchOp) throws IOException {
boolean replay = batchOp.isInReplay();
long currentNonceGroup = HConstants.NO_NONCE;
long currentNonce = HConstants.NO_NONCE;
WALEdit walEdit = null;
boolean locked = false;
// reference family maps directly so coprocessors can mutate them if desired
Map<byte[], List<Cell>>[] familyMaps = new Map[batchOp.operations.length];
// We try to set up a batch in the range [firstIndex,lastIndexExclusive)
int firstIndex = batchOp.nextIndexToProcess;
int lastIndexExclusive = firstIndex;
boolean success = false;
int noOfPuts = 0;
int noOfDeletes = 0;
WriteEntry writeEntry = null;
int cellCount = 0;
/** Keep track of the locks we hold so we can release them in finally clause */
List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
MemstoreSize memstoreSize = new MemstoreSize();
final ObservedExceptionsInBatch observedExceptions = new ObservedExceptionsInBatch();
try {
// STEP 1. Try to acquire as many locks as we can, and ensure we acquire at least one.
int numReadyToWrite = 0;
long now = EnvironmentEdgeManager.currentTime();
while (lastIndexExclusive < batchOp.operations.length) {
if (checkBatchOp(batchOp, lastIndexExclusive, familyMaps, now, observedExceptions)) {
lastIndexExclusive++;
continue;
}
Mutation mutation = batchOp.getMutation(lastIndexExclusive);
// If we haven't got any rows in our batch, we should block to get the next one.
RowLock rowLock = null;
try {
rowLock = getRowLockInternal(mutation.getRow(), true);
} catch (TimeoutIOException e) {
// We will retry when other exceptions, but we should stop if we timeout .
throw e;
} catch (IOException ioe) {
LOG.warn("Failed getting lock, row=" + Bytes.toStringBinary(mutation.getRow()), ioe);
}
if (rowLock == null) {
// Stop acquiring more rows for this batch
break;
} else {
acquiredRowLocks.add(rowLock);
}
lastIndexExclusive++;
numReadyToWrite++;
if (replay) {
for (List<Cell> cells : mutation.getFamilyCellMap().values()) {
cellCount += cells.size();
}
}
}
// We've now grabbed as many mutations off the list as we can
// STEP 2. Update any LATEST_TIMESTAMP timestamps
// We should record the timestamp only after we have acquired the rowLock,
// otherwise, newer puts/deletes are not guaranteed to have a newer timestamp
now = EnvironmentEdgeManager.currentTime();
byte[] byteNow = Bytes.toBytes(now);
// Nothing to put/delete -- an exception in the above such as NoSuchColumnFamily?
if (numReadyToWrite <= 0) {
return;
}
for (int i = firstIndex; !replay && i < lastIndexExclusive; i++) {
// skip invalid
if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) {
// lastIndexExclusive was incremented above.
continue;
}
Mutation mutation = batchOp.getMutation(i);
if (mutation instanceof Put) {
updateCellTimestamps(familyMaps[i].values(), byteNow);
noOfPuts++;
} else {
prepareDeleteTimestamps(mutation, familyMaps[i], byteNow);
noOfDeletes++;
}
rewriteCellTags(familyMaps[i], mutation);
WALEdit fromCP = batchOp.walEditsFromCoprocessors[i];
if (fromCP != null) {
cellCount += fromCP.size();
}
if (getEffectiveDurability(mutation.getDurability()) != Durability.SKIP_WAL) {
for (List<Cell> cells : familyMaps[i].values()) {
cellCount += cells.size();
}
}
}
lock(this.updatesLock.readLock(), numReadyToWrite);
locked = true;
// calling the pre CP hook for batch mutation
if (!replay && coprocessorHost != null) {
MiniBatchOperationInProgress<Mutation> miniBatchOp = new MiniBatchOperationInProgress<>(batchOp.getMutationsForCoprocs(), batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
if (coprocessorHost.preBatchMutate(miniBatchOp)) {
return;
} else {
for (int i = firstIndex; i < lastIndexExclusive; i++) {
if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) {
// lastIndexExclusive was incremented above.
continue;
}
// we pass (i - firstIndex) below since the call expects a relative index
Mutation[] cpMutations = miniBatchOp.getOperationsFromCoprocessors(i - firstIndex);
if (cpMutations == null) {
continue;
}
Mutation mutation = batchOp.getMutation(i);
boolean skipWal = getEffectiveDurability(mutation.getDurability()) == Durability.SKIP_WAL;
// Else Coprocessor added more Mutations corresponding to the Mutation at this index.
for (int j = 0; j < cpMutations.length; j++) {
Mutation cpMutation = cpMutations[j];
Map<byte[], List<Cell>> cpFamilyMap = cpMutation.getFamilyCellMap();
checkAndPrepareMutation(cpMutation, replay, cpFamilyMap, now);
// Acquire row locks. If not, the whole batch will fail.
acquiredRowLocks.add(getRowLockInternal(cpMutation.getRow(), true));
// Returned mutations from coprocessor correspond to the Mutation at index i. We can
// directly add the cells from those mutations to the familyMaps of this mutation.
// will get added to the memstore later
mergeFamilyMaps(familyMaps[i], cpFamilyMap);
// cells of returned mutation.
if (!skipWal) {
for (List<Cell> cells : cpFamilyMap.values()) {
cellCount += cells.size();
}
}
}
}
}
}
// STEP 3. Build WAL edit
walEdit = new WALEdit(cellCount, replay);
Durability durability = Durability.USE_DEFAULT;
for (int i = firstIndex; i < lastIndexExclusive; i++) {
// Skip puts that were determined to be invalid during preprocessing
if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) {
continue;
}
Mutation m = batchOp.getMutation(i);
Durability tmpDur = getEffectiveDurability(m.getDurability());
if (tmpDur.ordinal() > durability.ordinal()) {
durability = tmpDur;
}
// we use durability of the original mutation for the mutation passed by CP.
if (tmpDur == Durability.SKIP_WAL) {
recordMutationWithoutWal(m.getFamilyCellMap());
continue;
}
long nonceGroup = batchOp.getNonceGroup(i);
long nonce = batchOp.getNonce(i);
// They don't have to be, it will still work, just write more WALEdits than needed.
if (nonceGroup != currentNonceGroup || nonce != currentNonce) {
// Write what we have so far for nonces out to WAL
appendCurrentNonces(m, replay, walEdit, now, currentNonceGroup, currentNonce);
walEdit = new WALEdit(cellCount, replay);
currentNonceGroup = nonceGroup;
currentNonce = nonce;
}
// Add WAL edits by CP
WALEdit fromCP = batchOp.walEditsFromCoprocessors[i];
if (fromCP != null) {
for (Cell cell : fromCP.getCells()) {
walEdit.add(cell);
}
}
addFamilyMapToWALEdit(familyMaps[i], walEdit);
}
// STEP 4. Append the final edit to WAL and sync.
Mutation mutation = batchOp.getMutation(firstIndex);
WALKey walKey = null;
long txid;
if (replay) {
// use wal key from the original
walKey = new WALKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc);
walKey.setOrigLogSeqNum(batchOp.getReplaySequenceId());
if (!walEdit.isEmpty()) {
txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
if (txid != 0) {
sync(txid, durability);
}
}
} else {
try {
if (!walEdit.isEmpty()) {
// we use HLogKey here instead of WALKey directly to support legacy coprocessors.
walKey = new WALKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc, this.getReplicationScope());
// TODO: Use the doAppend methods below... complicated by the replay stuff above.
txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
if (txid != 0) {
sync(txid, durability);
}
if (writeEntry == null) {
// if MVCC not preassigned, wait here until assigned
writeEntry = walKey.getWriteEntry();
}
}
} catch (IOException ioe) {
if (walKey != null && writeEntry == null) {
// the writeEntry is not preassigned and error occurred during append or sync
mvcc.complete(walKey.getWriteEntry());
}
throw ioe;
}
}
if (walKey == null) {
// If no walKey, then not in replay and skipping WAL or some such. Begin an MVCC transaction
// to get sequence id.
writeEntry = mvcc.begin();
}
// STEP 5. Write back to memstore
for (int i = firstIndex; i < lastIndexExclusive; i++) {
if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) {
continue;
}
// We need to update the sequence id for following reasons.
// 1) If the op is in replay mode, FSWALEntry#stampRegionSequenceId won't stamp sequence id.
// 2) If no WAL, FSWALEntry won't be used
// we use durability of the original mutation for the mutation passed by CP.
boolean updateSeqId = replay || batchOp.getMutation(i).getDurability() == Durability.SKIP_WAL;
if (updateSeqId) {
this.updateSequenceId(familyMaps[i].values(), replay ? batchOp.getReplaySequenceId() : writeEntry.getWriteNumber());
}
applyFamilyMapToMemstore(familyMaps[i], memstoreSize);
}
// calling the post CP hook for batch mutation
if (!replay && coprocessorHost != null) {
MiniBatchOperationInProgress<Mutation> miniBatchOp = new MiniBatchOperationInProgress<>(batchOp.getMutationsForCoprocs(), batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
coprocessorHost.postBatchMutate(miniBatchOp);
}
// STEP 6. Complete mvcc.
if (replay) {
this.mvcc.advanceTo(batchOp.getReplaySequenceId());
} else {
// writeEntry won't be empty if not in replay mode
mvcc.completeAndWait(writeEntry);
writeEntry = null;
}
// STEP 7. Release row locks, etc.
if (locked) {
this.updatesLock.readLock().unlock();
locked = false;
}
releaseRowLocks(acquiredRowLocks);
for (int i = firstIndex; i < lastIndexExclusive; i++) {
if (batchOp.retCodeDetails[i] == OperationStatus.NOT_RUN) {
batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
}
}
// synced so that the coprocessor contract is adhered to.
if (!replay && coprocessorHost != null) {
for (int i = firstIndex; i < lastIndexExclusive; i++) {
// only for successful puts
if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.SUCCESS) {
continue;
}
Mutation m = batchOp.getMutation(i);
if (m instanceof Put) {
coprocessorHost.postPut((Put) m, walEdit, m.getDurability());
} else {
coprocessorHost.postDelete((Delete) m, walEdit, m.getDurability());
}
}
}
success = true;
} finally {
// Call complete rather than completeAndWait because we probably had error if walKey != null
if (writeEntry != null)
mvcc.complete(writeEntry);
this.addAndGetMemstoreSize(memstoreSize);
if (locked) {
this.updatesLock.readLock().unlock();
}
releaseRowLocks(acquiredRowLocks);
if (noOfPuts > 0) {
// There were some Puts in the batch.
if (this.metricsRegion != null) {
this.metricsRegion.updatePut();
}
}
if (noOfDeletes > 0) {
// There were some Deletes in the batch.
if (this.metricsRegion != null) {
this.metricsRegion.updateDelete();
}
}
if (!success) {
for (int i = firstIndex; i < lastIndexExclusive; i++) {
if (batchOp.retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.NOT_RUN) {
batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
}
}
}
if (coprocessorHost != null && !batchOp.isInReplay()) {
// call the coprocessor hook to do any finalization steps
// after the put is done
MiniBatchOperationInProgress<Mutation> miniBatchOp = new MiniBatchOperationInProgress<>(batchOp.getMutationsForCoprocs(), batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
coprocessorHost.postBatchMutateIndispensably(miniBatchOp, success);
}
batchOp.nextIndexToProcess = lastIndexExclusive;
}
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class HRegion method doWALAppend.
/**
* @return writeEntry associated with this append
*/
private WriteEntry doWALAppend(WALEdit walEdit, Durability durability, List<UUID> clusterIds, long now, long nonceGroup, long nonce) throws IOException {
WriteEntry writeEntry = null;
// Using default cluster id, as this can only happen in the originating cluster.
// A slave cluster receives the final value (not the delta) as a Put. We use HLogKey
// here instead of WALKey directly to support legacy coprocessors.
WALKey walKey = new WALKey(this.getRegionInfo().getEncodedNameAsBytes(), this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, clusterIds, nonceGroup, nonce, mvcc, this.getReplicationScope());
try {
long txid = this.wal.append(this.getRegionInfo(), walKey, walEdit, true);
// Call sync on our edit.
if (txid != 0)
sync(txid, durability);
writeEntry = walKey.getWriteEntry();
} catch (IOException ioe) {
if (walKey != null)
mvcc.complete(walKey.getWriteEntry());
throw ioe;
}
return writeEntry;
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class ReplicationProtbufUtil method buildReplicateWALEntryRequest.
/**
* Create a new ReplicateWALEntryRequest from a list of WAL entries
* @param entries the WAL entries to be replicated
* @param encodedRegionName alternative region name to use if not null
* @param replicationClusterId Id which will uniquely identify source cluster FS client
* configurations in the replication configuration directory
* @param sourceBaseNamespaceDir Path to source cluster base namespace directory
* @param sourceHFileArchiveDir Path to the source cluster hfile archive directory
* @return a pair of ReplicateWALEntryRequest and a CellScanner over all the WALEdit values found.
*/
public static Pair<AdminProtos.ReplicateWALEntryRequest, CellScanner> buildReplicateWALEntryRequest(final Entry[] entries, byte[] encodedRegionName, String replicationClusterId, Path sourceBaseNamespaceDir, Path sourceHFileArchiveDir) {
// Accumulate all the Cells seen in here.
List<List<? extends Cell>> allCells = new ArrayList<>(entries.length);
int size = 0;
WALProtos.FamilyScope.Builder scopeBuilder = WALProtos.FamilyScope.newBuilder();
AdminProtos.WALEntry.Builder entryBuilder = AdminProtos.WALEntry.newBuilder();
AdminProtos.ReplicateWALEntryRequest.Builder builder = AdminProtos.ReplicateWALEntryRequest.newBuilder();
HBaseProtos.UUID.Builder uuidBuilder = HBaseProtos.UUID.newBuilder();
for (Entry entry : entries) {
entryBuilder.clear();
// TODO: this duplicates a lot in WALKey#getBuilder
WALProtos.WALKey.Builder keyBuilder = entryBuilder.getKeyBuilder();
WALKey key = entry.getKey();
keyBuilder.setEncodedRegionName(UnsafeByteOperations.unsafeWrap(encodedRegionName == null ? key.getEncodedRegionName() : encodedRegionName));
keyBuilder.setTableName(UnsafeByteOperations.unsafeWrap(key.getTablename().getName()));
keyBuilder.setLogSequenceNumber(key.getLogSeqNum());
keyBuilder.setWriteTime(key.getWriteTime());
if (key.getNonce() != HConstants.NO_NONCE) {
keyBuilder.setNonce(key.getNonce());
}
if (key.getNonceGroup() != HConstants.NO_NONCE) {
keyBuilder.setNonceGroup(key.getNonceGroup());
}
for (UUID clusterId : key.getClusterIds()) {
uuidBuilder.setLeastSigBits(clusterId.getLeastSignificantBits());
uuidBuilder.setMostSigBits(clusterId.getMostSignificantBits());
keyBuilder.addClusterIds(uuidBuilder.build());
}
if (key.getOrigLogSeqNum() > 0) {
keyBuilder.setOrigSequenceNumber(key.getOrigLogSeqNum());
}
WALEdit edit = entry.getEdit();
NavigableMap<byte[], Integer> scopes = key.getReplicationScopes();
if (scopes != null && !scopes.isEmpty()) {
for (Map.Entry<byte[], Integer> scope : scopes.entrySet()) {
scopeBuilder.setFamily(UnsafeByteOperations.unsafeWrap(scope.getKey()));
WALProtos.ScopeType scopeType = WALProtos.ScopeType.valueOf(scope.getValue().intValue());
scopeBuilder.setScopeType(scopeType);
keyBuilder.addScopes(scopeBuilder.build());
}
}
List<Cell> cells = edit.getCells();
// Add up the size. It is used later serializing out the kvs.
for (Cell cell : cells) {
size += CellUtil.estimatedSerializedSizeOf(cell);
}
// Collect up the cells
allCells.add(cells);
// Write out how many cells associated with this entry.
entryBuilder.setAssociatedCellCount(cells.size());
builder.addEntry(entryBuilder.build());
}
if (replicationClusterId != null) {
builder.setReplicationClusterId(replicationClusterId);
}
if (sourceBaseNamespaceDir != null) {
builder.setSourceBaseNamespaceDirPath(sourceBaseNamespaceDir.toString());
}
if (sourceHFileArchiveDir != null) {
builder.setSourceHFileArchiveDirPath(sourceHFileArchiveDir.toString());
}
return new Pair<>(builder.build(), getCellScanner(allCells, size));
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class FaultyProtobufLogReader method next.
@Override
public Entry next(Entry reuse) throws IOException {
if (nextQueue.isEmpty()) {
// Read the whole thing at once and fake reading
boolean b;
do {
Entry e = new Entry(new WALKey(), new WALEdit());
if (compressionContext != null) {
e.setCompressionContext(compressionContext);
}
b = readNext(e);
nextQueue.offer(e);
numberOfFileEntries++;
} while (b);
}
if (nextQueue.size() == this.numberOfFileEntries && getFailureType() == FailureType.BEGINNING) {
throw new IOException("fake Exception");
} else if (nextQueue.size() == this.numberOfFileEntries / 2 && getFailureType() == FailureType.MIDDLE) {
throw new IOException("fake Exception");
} else if (nextQueue.size() == 1 && getFailureType() == FailureType.END) {
throw new IOException("fake Exception");
}
if (nextQueue.peek() != null) {
edit++;
}
Entry e = nextQueue.poll();
if (e.getEdit().isEmpty()) {
return null;
}
return e;
}
use of org.apache.hadoop.hbase.wal.WALKey in project hbase by apache.
the class AbstractTestFSWAL method addEdits.
protected void addEdits(WAL log, HRegionInfo hri, HTableDescriptor htd, int times, MultiVersionConcurrencyControl mvcc, NavigableMap<byte[], Integer> scopes) throws IOException {
final byte[] row = Bytes.toBytes("row");
for (int i = 0; i < times; i++) {
long timestamp = System.currentTimeMillis();
WALEdit cols = new WALEdit();
cols.add(new KeyValue(row, row, row, timestamp, row));
WALKey key = new WALKey(hri.getEncodedNameAsBytes(), htd.getTableName(), WALKey.NO_SEQUENCE_ID, timestamp, WALKey.EMPTY_UUIDS, HConstants.NO_NONCE, HConstants.NO_NONCE, mvcc, scopes);
log.append(hri, key, cols, true);
}
log.sync();
}
Aggregations