use of org.apache.hadoop.hbase.client.Durability in project hbase by apache.
the class HRegion method doDelta.
/**
* Add "deltas" to Cells. Deltas are increments or appends. Switch on <code>op</code>.
*
* <p>If increment, add deltas to current values or if an append, then
* append the deltas to the current Cell values.
*
* <p>Append and Increment code paths are mostly the same. They differ in just a few places.
* This method does the code path for increment and append and then in key spots, switches
* on the passed in <code>op</code> to do increment or append specific paths.
*/
private Result doDelta(Operation op, Mutation mutation, long nonceGroup, long nonce, boolean returnResults) throws IOException {
checkReadOnly();
checkResources();
checkRow(mutation.getRow(), op.toString());
checkFamilies(mutation.getFamilyCellMap().keySet());
this.writeRequestsCount.increment();
WriteEntry writeEntry = null;
startRegionOperation(op);
List<Cell> results = returnResults ? new ArrayList<>(mutation.size()) : null;
RowLock rowLock = null;
MemstoreSize memstoreSize = new MemstoreSize();
try {
rowLock = getRowLockInternal(mutation.getRow(), false);
lock(this.updatesLock.readLock());
try {
Result cpResult = doCoprocessorPreCall(op, mutation);
if (cpResult != null) {
return returnResults ? cpResult : null;
}
Durability effectiveDurability = getEffectiveDurability(mutation.getDurability());
Map<Store, List<Cell>> forMemStore = new HashMap<>(mutation.getFamilyCellMap().size());
// Reckon Cells to apply to WAL -- in returned walEdit -- and what to add to memstore and
// what to return back to the client (in 'forMemStore' and 'results' respectively).
WALEdit walEdit = reckonDeltas(op, mutation, effectiveDurability, forMemStore, results);
// Actually write to WAL now if a walEdit to apply.
if (walEdit != null && !walEdit.isEmpty()) {
writeEntry = doWALAppend(walEdit, effectiveDurability, nonceGroup, nonce);
} else {
// If walEdits is empty, it means we skipped the WAL; update LongAdders and start an mvcc
// transaction.
recordMutationWithoutWal(mutation.getFamilyCellMap());
writeEntry = mvcc.begin();
updateSequenceId(forMemStore.values(), writeEntry.getWriteNumber());
}
// Now write to MemStore. Do it a column family at a time.
for (Map.Entry<Store, List<Cell>> e : forMemStore.entrySet()) {
applyToMemstore(e.getKey(), e.getValue(), true, memstoreSize);
}
mvcc.completeAndWait(writeEntry);
if (rsServices != null && rsServices.getNonceManager() != null) {
rsServices.getNonceManager().addMvccToOperationContext(nonceGroup, nonce, writeEntry.getWriteNumber());
}
writeEntry = null;
} finally {
this.updatesLock.readLock().unlock();
}
// If results is null, then client asked that we not return the calculated results.
return results != null && returnResults ? Result.create(results) : Result.EMPTY_RESULT;
} finally {
// a 0 increment.
if (writeEntry != null)
mvcc.complete(writeEntry);
if (rowLock != null) {
rowLock.release();
}
// Request a cache flush if over the limit. Do it outside update lock.
if (isFlushSize(addAndGetMemstoreSize(memstoreSize))) {
requestFlush();
}
closeRegionOperation(op);
if (this.metricsRegion != null) {
switch(op) {
case INCREMENT:
this.metricsRegion.updateIncrement();
break;
case APPEND:
this.metricsRegion.updateAppend();
break;
default:
break;
}
}
}
}
use of org.apache.hadoop.hbase.client.Durability in project phoenix by apache.
the class Indexer method preBatchMutateWithExceptions.
public void preBatchMutateWithExceptions(ObserverContext<RegionCoprocessorEnvironment> c, MiniBatchOperationInProgress<Mutation> miniBatchOp) throws Throwable {
// first group all the updates for a single row into a single update to be processed
Map<ImmutableBytesPtr, MultiMutation> mutationsMap = new HashMap<ImmutableBytesPtr, MultiMutation>();
Durability defaultDurability = Durability.SYNC_WAL;
if (c.getEnvironment().getRegion() != null) {
defaultDurability = c.getEnvironment().getRegion().getTableDesc().getDurability();
defaultDurability = (defaultDurability == Durability.USE_DEFAULT) ? Durability.SYNC_WAL : defaultDurability;
}
/*
* Exclusively lock all rows so we get a consistent read
* while determining the index updates
*/
BatchMutateContext context = new BatchMutateContext();
setBatchMutateContext(c, context);
Durability durability = Durability.SKIP_WAL;
boolean copyMutations = false;
for (int i = 0; i < miniBatchOp.size(); i++) {
Mutation m = miniBatchOp.getOperation(i);
if (this.builder.isAtomicOp(m)) {
miniBatchOp.setOperationStatus(i, IGNORE);
continue;
}
if (this.builder.isEnabled(m)) {
context.rowLocks.add(lockManager.lockRow(m.getRow(), rowLockWaitDuration));
Durability effectiveDurablity = (m.getDurability() == Durability.USE_DEFAULT) ? defaultDurability : m.getDurability();
if (effectiveDurablity.ordinal() > durability.ordinal()) {
durability = effectiveDurablity;
}
// Track whether or not we need to
ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
if (mutationsMap.containsKey(row)) {
copyMutations = true;
} else {
mutationsMap.put(row, null);
}
}
}
// early exit if it turns out we don't have any edits
if (mutationsMap.isEmpty()) {
return;
}
// If we're copying the mutations
Collection<Mutation> originalMutations;
Collection<? extends Mutation> mutations;
if (copyMutations) {
originalMutations = null;
mutations = mutationsMap.values();
} else {
originalMutations = Lists.newArrayListWithExpectedSize(mutationsMap.size());
mutations = originalMutations;
}
Mutation firstMutation = miniBatchOp.getOperation(0);
ReplayWrite replayWrite = this.builder.getReplayWrite(firstMutation);
boolean resetTimeStamp = replayWrite == null;
long now = EnvironmentEdgeManager.currentTimeMillis();
byte[] byteNow = Bytes.toBytes(now);
for (int i = 0; i < miniBatchOp.size(); i++) {
Mutation m = miniBatchOp.getOperation(i);
// way optimization go though.
if (miniBatchOp.getOperationStatus(i) != IGNORE && this.builder.isEnabled(m)) {
if (resetTimeStamp) {
// inconsistencies as this case isn't handled correctly currently).
for (List<Cell> family : m.getFamilyCellMap().values()) {
List<KeyValue> familyKVs = KeyValueUtil.ensureKeyValues(family);
for (KeyValue kv : familyKVs) {
setTimeStamp(kv, byteNow);
}
}
}
// the index as they're already written and just being replayed.
if (replayWrite == ReplayWrite.INDEX_ONLY) {
miniBatchOp.setOperationStatus(i, NOWRITE);
}
// Put and a Delete mutation for the same row).
if (copyMutations) {
// Add the mutation to the batch set
ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
MultiMutation stored = mutationsMap.get(row);
// we haven't seen this row before, so add it
if (stored == null) {
stored = new MultiMutation(row);
mutationsMap.put(row, stored);
}
stored.addAll(m);
} else {
originalMutations.add(m);
}
}
}
// dump all the index updates into a single WAL. They will get combined in the end anyways, so
// don't worry which one we get
WALEdit edit = miniBatchOp.getWalEdit(0);
if (edit == null) {
edit = new WALEdit();
miniBatchOp.setWalEdit(0, edit);
}
if (copyMutations || replayWrite != null) {
mutations = IndexManagementUtil.flattenMutationsByTimestamp(mutations);
}
// get the current span, or just use a null-span to avoid a bunch of if statements
try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
Span current = scope.getSpan();
if (current == null) {
current = NullSpan.INSTANCE;
}
long start = EnvironmentEdgeManager.currentTimeMillis();
// get the index updates for all elements in this batch
Collection<Pair<Mutation, byte[]>> indexUpdates = this.builder.getIndexUpdate(miniBatchOp, mutations);
long duration = EnvironmentEdgeManager.currentTimeMillis() - start;
if (duration >= slowIndexPrepareThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug(getCallTooSlowMessage("indexPrepare", duration, slowIndexPrepareThreshold));
}
metricSource.incrementNumSlowIndexPrepareCalls();
}
metricSource.updateIndexPrepareTime(duration);
current.addTimelineAnnotation("Built index updates, doing preStep");
TracingUtils.addAnnotation(current, "index update count", indexUpdates.size());
byte[] tableName = c.getEnvironment().getRegion().getTableDesc().getTableName().getName();
Iterator<Pair<Mutation, byte[]>> indexUpdatesItr = indexUpdates.iterator();
List<Mutation> localUpdates = new ArrayList<Mutation>(indexUpdates.size());
while (indexUpdatesItr.hasNext()) {
Pair<Mutation, byte[]> next = indexUpdatesItr.next();
if (Bytes.compareTo(next.getSecond(), tableName) == 0) {
localUpdates.add(next.getFirst());
indexUpdatesItr.remove();
}
}
if (!localUpdates.isEmpty()) {
miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
}
if (!indexUpdates.isEmpty()) {
context.indexUpdates = indexUpdates;
// write index updates to WAL
if (durability != Durability.SKIP_WAL) {
// we have all the WAL durability, so we just update the WAL entry and move on
for (Pair<Mutation, byte[]> entry : indexUpdates) {
edit.add(new IndexedKeyValue(entry.getSecond(), entry.getFirst()));
}
}
}
}
}
use of org.apache.hadoop.hbase.client.Durability in project hbase by apache.
the class RSRpcServices method replay.
/**
* Replay the given changes when distributedLogReplay WAL edits from a failed RS. The guarantee is
* that the given mutations will be durable on the receiving RS if this method returns without any
* exception.
* @param controller the RPC controller
* @param request the request
* @deprecated Since 3.0.0, will be removed in 4.0.0. Not used any more, put here only for
* compatibility with old region replica implementation. Now we will use
* {@code replicateToReplica} method instead.
*/
@Deprecated
@Override
@QosPriority(priority = HConstants.REPLAY_QOS)
public ReplicateWALEntryResponse replay(final RpcController controller, final ReplicateWALEntryRequest request) throws ServiceException {
long before = EnvironmentEdgeManager.currentTime();
CellScanner cells = getAndReset(controller);
try {
checkOpen();
List<WALEntry> entries = request.getEntryList();
if (entries == null || entries.isEmpty()) {
// empty input
return ReplicateWALEntryResponse.newBuilder().build();
}
ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
HRegion region = server.getRegionByEncodedName(regionName.toStringUtf8());
RegionCoprocessorHost coprocessorHost = ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo()) ? region.getCoprocessorHost() : // do not invoke coprocessors if this is a secondary region replica
null;
List<Pair<WALKey, WALEdit>> walEntries = new ArrayList<>();
// Skip adding the edits to WAL if this is a secondary region replica
boolean isPrimary = RegionReplicaUtil.isDefaultReplica(region.getRegionInfo());
Durability durability = isPrimary ? Durability.USE_DEFAULT : Durability.SKIP_WAL;
for (WALEntry entry : entries) {
if (!regionName.equals(entry.getKey().getEncodedRegionName())) {
throw new NotServingRegionException("Replay request contains entries from multiple " + "regions. First region:" + regionName.toStringUtf8() + " , other region:" + entry.getKey().getEncodedRegionName());
}
if (server.nonceManager != null && isPrimary) {
long nonceGroup = entry.getKey().hasNonceGroup() ? entry.getKey().getNonceGroup() : HConstants.NO_NONCE;
long nonce = entry.getKey().hasNonce() ? entry.getKey().getNonce() : HConstants.NO_NONCE;
server.nonceManager.reportOperationFromWal(nonceGroup, nonce, entry.getKey().getWriteTime());
}
Pair<WALKey, WALEdit> walEntry = (coprocessorHost == null) ? null : new Pair<>();
List<MutationReplay> edits = WALSplitUtil.getMutationsFromWALEntry(entry, cells, walEntry, durability);
if (coprocessorHost != null) {
// KeyValue.
if (coprocessorHost.preWALRestore(region.getRegionInfo(), walEntry.getFirst(), walEntry.getSecond())) {
// if bypass this log entry, ignore it ...
continue;
}
walEntries.add(walEntry);
}
if (edits != null && !edits.isEmpty()) {
// HBASE-17924
// sort to improve lock efficiency
Collections.sort(edits, (v1, v2) -> Row.COMPARATOR.compare(v1.mutation, v2.mutation));
long replaySeqId = (entry.getKey().hasOrigSequenceNumber()) ? entry.getKey().getOrigSequenceNumber() : entry.getKey().getLogSequenceNumber();
OperationStatus[] result = doReplayBatchOp(region, edits, replaySeqId);
// check if it's a partial success
for (int i = 0; result != null && i < result.length; i++) {
if (result[i] != OperationStatus.SUCCESS) {
throw new IOException(result[i].getExceptionMsg());
}
}
}
}
// sync wal at the end because ASYNC_WAL is used above
WAL wal = region.getWAL();
if (wal != null) {
wal.sync();
}
if (coprocessorHost != null) {
for (Pair<WALKey, WALEdit> entry : walEntries) {
coprocessorHost.postWALRestore(region.getRegionInfo(), entry.getFirst(), entry.getSecond());
}
}
return ReplicateWALEntryResponse.newBuilder().build();
} catch (IOException ie) {
throw new ServiceException(ie);
} finally {
final MetricsRegionServer metricsRegionServer = server.getMetrics();
if (metricsRegionServer != null) {
metricsRegionServer.updateReplay(EnvironmentEdgeManager.currentTime() - before);
}
}
}
use of org.apache.hadoop.hbase.client.Durability in project hbase by apache.
the class LoadTestTool method initTestTable.
public void initTestTable() throws IOException {
Durability durability = Durability.USE_DEFAULT;
if (deferredLogFlush) {
durability = Durability.ASYNC_WAL;
}
HBaseTestingUtil.createPreSplitLoadTestTable(conf, tableName, getColumnFamilies(), compressAlgo, dataBlockEncodingAlgo, numRegionsPerServer, regionReplication, durability);
applyColumnFamilyOptions(tableName, getColumnFamilies());
}
use of org.apache.hadoop.hbase.client.Durability in project metron by apache.
the class HBaseBolt method save.
/**
* Saves an operation for later.
* @param tuple Contains the data elements that need written to HBase.
*/
private void save(Tuple tuple) {
byte[] rowKey = mapper.rowKey(tuple);
ColumnList cols = mapper.columns(tuple);
Durability durability = writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL;
Optional<Long> ttl = mapper.getTTL(tuple);
if (ttl.isPresent()) {
hbaseClient.addMutation(rowKey, cols, durability, ttl.get());
} else {
hbaseClient.addMutation(rowKey, cols, durability);
}
batchHelper.addBatch(tuple);
LOG.debug("Added mutation to the batch; size={}", batchHelper.getBatchSize());
}
Aggregations