use of org.apache.hadoop.hbase.regionserver.Region.RowLock in project hbase by apache.
the class TestHRegion method testBatchPut_whileMultipleRowLocksHeld.
@Test
public void testBatchPut_whileMultipleRowLocksHeld() throws Exception {
byte[] cf = Bytes.toBytes(COLUMN_FAMILY);
byte[] qual = Bytes.toBytes("qual");
byte[] val = Bytes.toBytes("val");
this.region = initHRegion(tableName, method, CONF, cf);
MetricsWALSource source = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class);
try {
long syncs = metricsAssertHelper.getCounter("syncTimeNumOps", source);
metricsAssertHelper.assertCounter("syncTimeNumOps", syncs, source);
final Put[] puts = new Put[10];
for (int i = 0; i < 10; i++) {
puts[i] = new Put(Bytes.toBytes("row_" + i));
puts[i].addColumn(cf, qual, val);
}
puts[5].addColumn(Bytes.toBytes("BAD_CF"), qual, val);
LOG.info("batchPut will have to break into four batches to avoid row locks");
RowLock rowLock1 = region.getRowLock(Bytes.toBytes("row_2"));
RowLock rowLock2 = region.getRowLock(Bytes.toBytes("row_1"));
RowLock rowLock3 = region.getRowLock(Bytes.toBytes("row_3"));
RowLock rowLock4 = region.getRowLock(Bytes.toBytes("row_3"), true);
MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(CONF);
final AtomicReference<OperationStatus[]> retFromThread = new AtomicReference<>();
final CountDownLatch startingPuts = new CountDownLatch(1);
final CountDownLatch startingClose = new CountDownLatch(1);
TestThread putter = new TestThread(ctx) {
@Override
public void doWork() throws IOException {
startingPuts.countDown();
retFromThread.set(region.batchMutate(puts));
}
};
LOG.info("...starting put thread while holding locks");
ctx.addThread(putter);
ctx.startThreads();
// Now attempt to close the region from another thread. Prior to HBASE-12565
// this would cause the in-progress batchMutate operation to to fail with
// exception because it use to release and re-acquire the close-guard lock
// between batches. Caller then didn't get status indicating which writes succeeded.
// We now expect this thread to block until the batchMutate call finishes.
Thread regionCloseThread = new TestThread(ctx) {
@Override
public void doWork() {
try {
startingPuts.await();
// Give some time for the batch mutate to get in.
// We don't want to race with the mutate
Thread.sleep(10);
startingClose.countDown();
HBaseTestingUtility.closeRegionAndWAL(region);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
};
regionCloseThread.start();
startingClose.await();
startingPuts.await();
Thread.sleep(100);
LOG.info("...releasing row lock 1, which should let put thread continue");
rowLock1.release();
rowLock2.release();
rowLock3.release();
waitForCounter(source, "syncTimeNumOps", syncs + 1);
LOG.info("...joining on put thread");
ctx.stop();
regionCloseThread.join();
OperationStatus[] codes = retFromThread.get();
for (int i = 0; i < codes.length; i++) {
assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY : OperationStatusCode.SUCCESS, codes[i].getOperationStatusCode());
}
rowLock4.release();
} finally {
HBaseTestingUtility.closeRegionAndWAL(this.region);
this.region = null;
}
}
use of org.apache.hadoop.hbase.regionserver.Region.RowLock in project hbase by apache.
the class TestHRegion method testBatchMutateWithWrongRegionException.
@Test
public void testBatchMutateWithWrongRegionException() throws Exception {
final byte[] a = Bytes.toBytes("a");
final byte[] b = Bytes.toBytes("b");
// exclusive
final byte[] c = Bytes.toBytes("c");
int prevLockTimeout = CONF.getInt("hbase.rowlock.wait.duration", 30000);
CONF.setInt("hbase.rowlock.wait.duration", 1000);
final HRegion region = initHRegion(tableName, a, c, method, CONF, false, fam1);
Mutation[] mutations = new Mutation[] { new Put(a).addImmutable(fam1, null, null), // this is outside the region boundary
new Put(c).addImmutable(fam1, null, null), new Put(b).addImmutable(fam1, null, null) };
OperationStatus[] status = region.batchMutate(mutations);
assertEquals(status[0].getOperationStatusCode(), OperationStatusCode.SUCCESS);
assertEquals(status[1].getOperationStatusCode(), OperationStatusCode.SANITY_CHECK_FAILURE);
assertEquals(status[2].getOperationStatusCode(), OperationStatusCode.SUCCESS);
// test with a row lock held for a long time
final CountDownLatch obtainedRowLock = new CountDownLatch(1);
ExecutorService exec = Executors.newFixedThreadPool(2);
Future<Void> f1 = exec.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
LOG.info("Acquiring row lock");
RowLock rl = region.getRowLock(b);
obtainedRowLock.countDown();
LOG.info("Waiting for 5 seconds before releasing lock");
Threads.sleep(5000);
LOG.info("Releasing row lock");
rl.release();
return null;
}
});
obtainedRowLock.await(30, TimeUnit.SECONDS);
Future<Void> f2 = exec.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
Mutation[] mutations = new Mutation[] { new Put(a).addImmutable(fam1, null, null), new Put(b).addImmutable(fam1, null, null) };
// this will wait for the row lock, and it will eventually succeed
OperationStatus[] status = region.batchMutate(mutations);
assertEquals(status[0].getOperationStatusCode(), OperationStatusCode.SUCCESS);
assertEquals(status[1].getOperationStatusCode(), OperationStatusCode.SUCCESS);
return null;
}
});
f1.get();
f2.get();
CONF.setInt("hbase.rowlock.wait.duration", prevLockTimeout);
}
use of org.apache.hadoop.hbase.regionserver.Region.RowLock in project phoenix by apache.
the class SequenceRegionObserver method preIncrement.
/**
* Use PreIncrement hook of BaseRegionObserver to overcome deficiencies in Increment
* implementation (HBASE-10254):
* 1) Lack of recognition and identification of when the key value to increment doesn't exist
* 2) Lack of the ability to set the timestamp of the updated key value.
* Works the same as existing region.increment(), except assumes there is a single column to
* increment and uses Phoenix LONG encoding.
*
* @since 3.0.0
*/
@Override
public Result preIncrement(final ObserverContext<RegionCoprocessorEnvironment> e, final Increment increment) throws IOException {
RegionCoprocessorEnvironment env = e.getEnvironment();
// We need to set this to prevent region.increment from being called
e.bypass();
e.complete();
Region region = env.getRegion();
byte[] row = increment.getRow();
List<RowLock> locks = Lists.newArrayList();
TimeRange tr = increment.getTimeRange();
region.startRegionOperation();
try {
acquireLock(region, row, locks);
try {
long maxTimestamp = tr.getMax();
boolean validateOnly = true;
Get get = new Get(row);
get.setTimeRange(tr.getMin(), tr.getMax());
for (Map.Entry<byte[], List<Cell>> entry : increment.getFamilyCellMap().entrySet()) {
byte[] cf = entry.getKey();
for (Cell cq : entry.getValue()) {
long value = Bytes.toLong(cq.getValueArray(), cq.getValueOffset());
get.addColumn(cf, CellUtil.cloneQualifier(cq));
long cellTimestamp = cq.getTimestamp();
// on the Increment or any of its Cells.
if (cellTimestamp > 0 && cellTimestamp < maxTimestamp) {
maxTimestamp = cellTimestamp;
get.setTimeRange(MetaDataProtocol.MIN_TABLE_TIMESTAMP, maxTimestamp);
}
validateOnly &= (Sequence.ValueOp.VALIDATE_SEQUENCE.ordinal() == value);
}
}
Result result = region.get(get);
if (result.isEmpty()) {
return getErrorResult(row, maxTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode());
}
KeyValue currentValueKV = Sequence.getCurrentValueKV(result);
KeyValue incrementByKV = Sequence.getIncrementByKV(result);
KeyValue cacheSizeKV = Sequence.getCacheSizeKV(result);
long currentValue = PLong.INSTANCE.getCodec().decodeLong(currentValueKV.getValueArray(), currentValueKV.getValueOffset(), SortOrder.getDefault());
long incrementBy = PLong.INSTANCE.getCodec().decodeLong(incrementByKV.getValueArray(), incrementByKV.getValueOffset(), SortOrder.getDefault());
long cacheSize = PLong.INSTANCE.getCodec().decodeLong(cacheSizeKV.getValueArray(), cacheSizeKV.getValueOffset(), SortOrder.getDefault());
// Hold timestamp constant for sequences, so that clients always only see the latest
// value regardless of when they connect.
long timestamp = currentValueKV.getTimestamp();
Put put = new Put(row, timestamp);
int numIncrementKVs = increment.getFamilyCellMap().get(PhoenixDatabaseMetaData.SYSTEM_SEQUENCE_FAMILY_BYTES).size();
// creates the list of KeyValues used for the Result that will be returned
List<Cell> cells = Sequence.getCells(result, numIncrementKVs);
//if client is 3.0/4.0 preserve the old behavior (older clients won't have newer columns present in the increment)
if (numIncrementKVs != Sequence.NUM_SEQUENCE_KEY_VALUES) {
currentValue += incrementBy * cacheSize;
// Hold timestamp constant for sequences, so that clients always only see the latest value
// regardless of when they connect.
KeyValue newCurrentValueKV = createKeyValue(row, PhoenixDatabaseMetaData.CURRENT_VALUE_BYTES, currentValue, timestamp);
put.add(newCurrentValueKV);
Sequence.replaceCurrentValueKV(cells, newCurrentValueKV);
} else {
KeyValue cycleKV = Sequence.getCycleKV(result);
KeyValue limitReachedKV = Sequence.getLimitReachedKV(result);
KeyValue minValueKV = Sequence.getMinValueKV(result);
KeyValue maxValueKV = Sequence.getMaxValueKV(result);
boolean increasingSeq = incrementBy > 0 ? true : false;
// if the minValue, maxValue, cycle and limitReached is null this sequence has been upgraded from
// a lower version. Set minValue, maxValue, cycle and limitReached to Long.MIN_VALUE, Long.MAX_VALUE, true and false
// respectively in order to maintain existing behavior and also update the KeyValues on the server
boolean limitReached;
if (limitReachedKV == null) {
limitReached = false;
KeyValue newLimitReachedKV = createKeyValue(row, PhoenixDatabaseMetaData.LIMIT_REACHED_FLAG_BYTES, limitReached, timestamp);
put.add(newLimitReachedKV);
Sequence.replaceLimitReachedKV(cells, newLimitReachedKV);
} else {
limitReached = (Boolean) PBoolean.INSTANCE.toObject(limitReachedKV.getValueArray(), limitReachedKV.getValueOffset(), limitReachedKV.getValueLength());
}
long minValue;
if (minValueKV == null) {
minValue = Long.MIN_VALUE;
KeyValue newMinValueKV = createKeyValue(row, PhoenixDatabaseMetaData.MIN_VALUE_BYTES, minValue, timestamp);
put.add(newMinValueKV);
Sequence.replaceMinValueKV(cells, newMinValueKV);
} else {
minValue = PLong.INSTANCE.getCodec().decodeLong(minValueKV.getValueArray(), minValueKV.getValueOffset(), SortOrder.getDefault());
}
long maxValue;
if (maxValueKV == null) {
maxValue = Long.MAX_VALUE;
KeyValue newMaxValueKV = createKeyValue(row, PhoenixDatabaseMetaData.MAX_VALUE_BYTES, maxValue, timestamp);
put.add(newMaxValueKV);
Sequence.replaceMaxValueKV(cells, newMaxValueKV);
} else {
maxValue = PLong.INSTANCE.getCodec().decodeLong(maxValueKV.getValueArray(), maxValueKV.getValueOffset(), SortOrder.getDefault());
}
boolean cycle;
if (cycleKV == null) {
cycle = false;
KeyValue newCycleKV = createKeyValue(row, PhoenixDatabaseMetaData.CYCLE_FLAG_BYTES, cycle, timestamp);
put.add(newCycleKV);
Sequence.replaceCycleValueKV(cells, newCycleKV);
} else {
cycle = (Boolean) PBoolean.INSTANCE.toObject(cycleKV.getValueArray(), cycleKV.getValueOffset(), cycleKV.getValueLength());
}
long numSlotsToAllocate = calculateNumSlotsToAllocate(increment);
// We don't support Bulk Allocations on sequences that have the CYCLE flag set to true
if (cycle && !SequenceUtil.isCycleAllowed(numSlotsToAllocate)) {
return getErrorResult(row, maxTimestamp, SQLExceptionCode.NUM_SEQ_TO_ALLOCATE_NOT_SUPPORTED.getErrorCode());
}
// Bulk Allocations are expressed by NEXT <n> VALUES FOR
if (SequenceUtil.isBulkAllocation(numSlotsToAllocate)) {
if (SequenceUtil.checkIfLimitReached(currentValue, minValue, maxValue, incrementBy, cacheSize, numSlotsToAllocate)) {
// all the slots requested.
return getErrorResult(row, maxTimestamp, SequenceUtil.getLimitReachedErrorCode(increasingSeq).getErrorCode());
}
}
if (validateOnly) {
return result;
}
// return if we have run out of sequence values
if (limitReached) {
if (cycle) {
// reset currentValue of the Sequence row to minValue/maxValue
currentValue = increasingSeq ? minValue : maxValue;
} else {
return getErrorResult(row, maxTimestamp, SequenceUtil.getLimitReachedErrorCode(increasingSeq).getErrorCode());
}
}
// check if the limit was reached
limitReached = SequenceUtil.checkIfLimitReached(currentValue, minValue, maxValue, incrementBy, cacheSize, numSlotsToAllocate);
// update currentValue
currentValue += incrementBy * (SequenceUtil.isBulkAllocation(numSlotsToAllocate) ? numSlotsToAllocate : cacheSize);
// update the currentValue of the Result row
KeyValue newCurrentValueKV = createKeyValue(row, PhoenixDatabaseMetaData.CURRENT_VALUE_BYTES, currentValue, timestamp);
Sequence.replaceCurrentValueKV(cells, newCurrentValueKV);
put.add(newCurrentValueKV);
// set the LIMIT_REACHED column to true, so that no new values can be used
KeyValue newLimitReachedKV = createKeyValue(row, PhoenixDatabaseMetaData.LIMIT_REACHED_FLAG_BYTES, limitReached, timestamp);
put.add(newLimitReachedKV);
}
// update the KeyValues on the server
Mutation[] mutations = new Mutation[] { put };
region.batchMutate(mutations, HConstants.NO_NONCE, HConstants.NO_NONCE);
// return a Result with the updated KeyValues
return Result.create(cells);
} finally {
region.releaseRowLocks(locks);
}
} catch (Throwable t) {
ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t);
// Impossible
return null;
} finally {
region.closeRegionOperation();
}
}
use of org.apache.hadoop.hbase.regionserver.Region.RowLock in project phoenix by apache.
the class SequenceRegionObserver method preAppend.
/**
* Override the preAppend for checkAndPut and checkAndDelete, as we need the ability to
* a) set the TimeRange for the Get being done and
* b) return something back to the client to indicate success/failure
*/
@SuppressWarnings("deprecation")
@Override
public Result preAppend(final ObserverContext<RegionCoprocessorEnvironment> e, final Append append) throws IOException {
byte[] opBuf = append.getAttribute(OPERATION_ATTRIB);
if (opBuf == null) {
return null;
}
Sequence.MetaOp op = Sequence.MetaOp.values()[opBuf[0]];
Cell keyValue = append.getFamilyCellMap().values().iterator().next().iterator().next();
long clientTimestamp = HConstants.LATEST_TIMESTAMP;
long minGetTimestamp = MetaDataProtocol.MIN_TABLE_TIMESTAMP;
long maxGetTimestamp = HConstants.LATEST_TIMESTAMP;
boolean hadClientTimestamp;
byte[] clientTimestampBuf = null;
if (op == Sequence.MetaOp.RETURN_SEQUENCE) {
// When returning sequences, this allows us to send the expected timestamp
// of the sequence to make sure we don't reset any other sequence
hadClientTimestamp = true;
clientTimestamp = minGetTimestamp = keyValue.getTimestamp();
maxGetTimestamp = minGetTimestamp + 1;
} else {
clientTimestampBuf = append.getAttribute(MAX_TIMERANGE_ATTRIB);
if (clientTimestampBuf != null) {
clientTimestamp = maxGetTimestamp = Bytes.toLong(clientTimestampBuf);
}
hadClientTimestamp = (clientTimestamp != HConstants.LATEST_TIMESTAMP);
if (hadClientTimestamp) {
// created.
if (op == Sequence.MetaOp.CREATE_SEQUENCE) {
maxGetTimestamp = clientTimestamp + 1;
}
} else {
clientTimestamp = maxGetTimestamp = EnvironmentEdgeManager.currentTimeMillis();
clientTimestampBuf = Bytes.toBytes(clientTimestamp);
}
}
RegionCoprocessorEnvironment env = e.getEnvironment();
// We need to set this to prevent region.append from being called
e.bypass();
e.complete();
Region region = env.getRegion();
byte[] row = append.getRow();
List<RowLock> locks = Lists.newArrayList();
region.startRegionOperation();
try {
acquireLock(region, row, locks);
try {
byte[] family = CellUtil.cloneFamily(keyValue);
byte[] qualifier = CellUtil.cloneQualifier(keyValue);
Get get = new Get(row);
get.setTimeRange(minGetTimestamp, maxGetTimestamp);
get.addColumn(family, qualifier);
Result result = region.get(get);
if (result.isEmpty()) {
if (op == Sequence.MetaOp.DROP_SEQUENCE || op == Sequence.MetaOp.RETURN_SEQUENCE) {
return getErrorResult(row, clientTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode());
}
} else {
if (op == Sequence.MetaOp.CREATE_SEQUENCE) {
return getErrorResult(row, clientTimestamp, SQLExceptionCode.SEQUENCE_ALREADY_EXIST.getErrorCode());
}
}
Mutation m = null;
switch(op) {
case RETURN_SEQUENCE:
KeyValue currentValueKV = result.raw()[0];
long expectedValue = PLong.INSTANCE.getCodec().decodeLong(append.getAttribute(CURRENT_VALUE_ATTRIB), 0, SortOrder.getDefault());
long value = PLong.INSTANCE.getCodec().decodeLong(currentValueKV.getValueArray(), currentValueKV.getValueOffset(), SortOrder.getDefault());
// Timestamp should match exactly, or we may have the wrong sequence
if (expectedValue != value || currentValueKV.getTimestamp() != clientTimestamp) {
return Result.create(Collections.singletonList((Cell) KeyValueUtil.newKeyValue(row, PhoenixDatabaseMetaData.SYSTEM_SEQUENCE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, currentValueKV.getTimestamp(), ByteUtil.EMPTY_BYTE_ARRAY)));
}
m = new Put(row, currentValueKV.getTimestamp());
m.getFamilyCellMap().putAll(append.getFamilyCellMap());
break;
case DROP_SEQUENCE:
m = new Delete(row, clientTimestamp);
break;
case CREATE_SEQUENCE:
m = new Put(row, clientTimestamp);
m.getFamilyCellMap().putAll(append.getFamilyCellMap());
break;
}
if (!hadClientTimestamp) {
for (List<Cell> kvs : m.getFamilyCellMap().values()) {
for (Cell kv : kvs) {
((KeyValue) kv).updateLatestStamp(clientTimestampBuf);
}
}
}
Mutation[] mutations = new Mutation[] { m };
region.batchMutate(mutations, HConstants.NO_NONCE, HConstants.NO_NONCE);
long serverTimestamp = MetaDataUtil.getClientTimeStamp(m);
// when the mutation was actually performed (useful in the case of .
return Result.create(Collections.singletonList((Cell) KeyValueUtil.newKeyValue(row, PhoenixDatabaseMetaData.SYSTEM_SEQUENCE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, serverTimestamp, SUCCESS_VALUE)));
} finally {
region.releaseRowLocks(locks);
}
} catch (Throwable t) {
ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t);
// Impossible
return null;
} finally {
region.closeRegionOperation();
}
}
use of org.apache.hadoop.hbase.regionserver.Region.RowLock in project phoenix by apache.
the class MetaDataEndpointImpl method dropFunction.
@Override
public void dropFunction(RpcController controller, DropFunctionRequest request, RpcCallback<MetaDataResponse> done) {
byte[][] rowKeyMetaData = new byte[2][];
byte[] functionName = null;
try {
List<Mutation> functionMetaData = ProtobufUtil.getMutations(request);
MetaDataUtil.getTenantIdAndFunctionName(functionMetaData, rowKeyMetaData);
byte[] tenantIdBytes = rowKeyMetaData[PhoenixDatabaseMetaData.TENANT_ID_INDEX];
functionName = rowKeyMetaData[PhoenixDatabaseMetaData.FUNTION_NAME_INDEX];
byte[] lockKey = SchemaUtil.getFunctionKey(tenantIdBytes, functionName);
Region region = env.getRegion();
MetaDataMutationResult result = checkFunctionKeyInRegion(lockKey, region);
if (result != null) {
done.run(MetaDataMutationResult.toProto(result));
return;
}
List<RowLock> locks = Lists.newArrayList();
long clientTimeStamp = MetaDataUtil.getClientTimeStamp(functionMetaData);
try {
acquireLock(region, lockKey, locks);
List<byte[]> keys = new ArrayList<byte[]>(1);
keys.add(lockKey);
List<ImmutableBytesPtr> invalidateList = new ArrayList<ImmutableBytesPtr>();
result = doDropFunction(clientTimeStamp, keys, functionMetaData, invalidateList);
if (result.getMutationCode() != MutationCode.FUNCTION_ALREADY_EXISTS) {
done.run(MetaDataMutationResult.toProto(result));
return;
}
region.mutateRowsWithLocks(functionMetaData, Collections.<byte[]>emptySet(), HConstants.NO_NONCE, HConstants.NO_NONCE);
Cache<ImmutableBytesPtr, PMetaDataEntity> metaDataCache = GlobalCache.getInstance(this.env).getMetaDataCache();
long currentTime = MetaDataUtil.getClientTimeStamp(functionMetaData);
for (ImmutableBytesPtr ptr : invalidateList) {
metaDataCache.invalidate(ptr);
metaDataCache.put(ptr, newDeletedFunctionMarker(currentTime));
}
done.run(MetaDataMutationResult.toProto(result));
return;
} finally {
region.releaseRowLocks(locks);
}
} catch (Throwable t) {
logger.error("dropFunction failed", t);
ProtobufUtil.setControllerException(controller, ServerUtil.createIOException(Bytes.toString(functionName), t));
}
}
Aggregations