use of org.apache.hadoop.hbase.util.NonceKey in project hbase by apache.
the class TestProcedureNonce method testCompletedProcWithSameNonce.
@Test
public void testCompletedProcWithSameNonce() throws Exception {
final long nonceGroup = 123;
final long nonce = 2222;
// register the nonce
final NonceKey nonceKey = procExecutor.createNonceKey(nonceGroup, nonce);
assertFalse(procExecutor.registerNonce(nonceKey) >= 0);
// Submit a proc and wait for its completion
Procedure proc = new TestSingleStepProcedure();
long procId = procExecutor.submitProcedure(proc, nonceKey);
ProcedureTestingUtility.waitProcedure(procExecutor, procId);
// Restart
ProcedureTestingUtility.restart(procExecutor);
ProcedureTestingUtility.waitProcedure(procExecutor, procId);
// try to register a procedure with the same nonce
// we should get back the old procId
assertEquals(procId, procExecutor.registerNonce(nonceKey));
Procedure<?> result = procExecutor.getResult(procId);
ProcedureTestingUtility.assertProcNotFailed(result);
}
use of org.apache.hadoop.hbase.util.NonceKey in project hbase by apache.
the class ProcedureExecutor method loadProcedures.
private void loadProcedures(ProcedureIterator procIter, boolean abortOnCorruption) throws IOException {
// 1. Build the rollback stack
int runnableCount = 0;
int failedCount = 0;
int waitingCount = 0;
int waitingTimeoutCount = 0;
while (procIter.hasNext()) {
boolean finished = procIter.isNextFinished();
@SuppressWarnings("unchecked") Procedure<TEnvironment> proc = procIter.next();
NonceKey nonceKey = proc.getNonceKey();
long procId = proc.getProcId();
if (finished) {
completed.put(proc.getProcId(), new CompletedProcedureRetainer<>(proc));
LOG.debug("Completed {}", proc);
} else {
if (!proc.hasParent()) {
assert !proc.isFinished() : "unexpected finished procedure";
rollbackStack.put(proc.getProcId(), new RootProcedureState<>());
}
// add the procedure to the map
proc.beforeReplay(getEnvironment());
procedures.put(proc.getProcId(), proc);
switch(proc.getState()) {
case RUNNABLE:
runnableCount++;
break;
case FAILED:
failedCount++;
break;
case WAITING:
waitingCount++;
break;
case WAITING_TIMEOUT:
waitingTimeoutCount++;
break;
default:
break;
}
}
if (nonceKey != null) {
// add the nonce to the map
nonceKeysToProcIdsMap.put(nonceKey, procId);
}
}
// 2. Initialize the stacks: In the old implementation, for procedures in FAILED state, we will
// push it into the ProcedureScheduler directly to execute the rollback. But this does not work
// after we introduce the restore lock stage. For now, when we acquire a xlock, we will remove
// the queue from runQueue in scheduler, and then when a procedure which has lock access, for
// example, a sub procedure of the procedure which has the xlock, is pushed into the scheduler,
// we will add the queue back to let the workers poll from it. The assumption here is that, the
// procedure which has the xlock should have been polled out already, so when loading we can not
// add the procedure to scheduler first and then call acquireLock, since the procedure is still
// in the queue, and since we will remove the queue from runQueue, then no one can poll it out,
// then there is a dead lock
List<Procedure<TEnvironment>> runnableList = new ArrayList<>(runnableCount);
List<Procedure<TEnvironment>> failedList = new ArrayList<>(failedCount);
List<Procedure<TEnvironment>> waitingList = new ArrayList<>(waitingCount);
List<Procedure<TEnvironment>> waitingTimeoutList = new ArrayList<>(waitingTimeoutCount);
procIter.reset();
while (procIter.hasNext()) {
if (procIter.isNextFinished()) {
procIter.skipNext();
continue;
}
@SuppressWarnings("unchecked") Procedure<TEnvironment> proc = procIter.next();
assert !(proc.isFinished() && !proc.hasParent()) : "unexpected completed proc=" + proc;
LOG.debug("Loading {}", proc);
Long rootProcId = getRootProcedureId(proc);
// The orphan procedures will be passed to handleCorrupted, so add an assert here
assert rootProcId != null;
if (proc.hasParent()) {
Procedure<TEnvironment> parent = procedures.get(proc.getParentProcId());
if (parent != null && !proc.isFinished()) {
parent.incChildrenLatch();
}
}
RootProcedureState<TEnvironment> procStack = rollbackStack.get(rootProcId);
procStack.loadStack(proc);
proc.setRootProcId(rootProcId);
switch(proc.getState()) {
case RUNNABLE:
runnableList.add(proc);
break;
case WAITING:
waitingList.add(proc);
break;
case WAITING_TIMEOUT:
waitingTimeoutList.add(proc);
break;
case FAILED:
failedList.add(proc);
break;
case ROLLEDBACK:
case INITIALIZING:
String msg = "Unexpected " + proc.getState() + " state for " + proc;
LOG.error(msg);
throw new UnsupportedOperationException(msg);
default:
break;
}
}
// 3. Check the waiting procedures to see if some of them can be added to runnable.
waitingList.forEach(proc -> {
if (!proc.hasChildren()) {
// Normally, WAITING procedures should be waken by its children. But, there is a case that,
// all the children are successful and before they can wake up their parent procedure, the
// master was killed. So, during recovering the procedures from ProcedureWal, its children
// are not loaded because of their SUCCESS state. So we need to continue to run this WAITING
// procedure. But before executing, we need to set its state to RUNNABLE, otherwise, a
// exception will throw:
// Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,
// "NOT RUNNABLE! " + procedure.toString());
proc.setState(ProcedureState.RUNNABLE);
runnableList.add(proc);
} else {
proc.afterReplay(getEnvironment());
}
});
// 4. restore locks
restoreLocks();
// 5. Push the procedures to the timeout executor
waitingTimeoutList.forEach(proc -> {
proc.afterReplay(getEnvironment());
timeoutExecutor.add(proc);
});
// 6. Push the procedure to the scheduler
failedList.forEach(scheduler::addBack);
runnableList.forEach(p -> {
p.afterReplay(getEnvironment());
if (!p.hasParent()) {
sendProcedureLoadedNotification(p.getProcId());
}
scheduler.addBack(p);
});
// After all procedures put into the queue, signal the worker threads.
// Otherwise, there is a race condition. See HBASE-21364.
scheduler.signalAll();
}
use of org.apache.hadoop.hbase.util.NonceKey in project hbase by apache.
the class TestModifyTableProcedure method testColumnFamilyAdditionTwiceWithNonce.
@Test
public void testColumnFamilyAdditionTwiceWithNonce() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final String cf2 = "cf2";
final String cf3 = "cf3";
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
// create the table
RegionInfo[] regions = MasterProcedureTestingUtility.createTable(procExec, tableName, null, "cf1", cf3);
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
// Modify multiple properties of the table.
TableDescriptor td = UTIL.getAdmin().getDescriptor(tableName);
TableDescriptor newTd = TableDescriptorBuilder.newBuilder(td).setCompactionEnabled(!td.isCompactionEnabled()).setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf2)).build();
PerClientRandomNonceGenerator nonceGenerator = PerClientRandomNonceGenerator.get();
long nonceGroup = nonceGenerator.getNonceGroup();
long newNonce = nonceGenerator.newNonce();
NonceKey nonceKey = new NonceKey(nonceGroup, newNonce);
procExec.registerNonce(nonceKey);
// Start the Modify procedure && kill the executor
final long procId = procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), newTd), nonceKey);
// Restart the executor after MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR and try to add column family
// as nonce are there , we should not fail
MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, new StepHook() {
@Override
public boolean execute(int step) throws IOException {
if (step == 3) {
return procId == UTIL.getHBaseCluster().getMaster().addColumn(tableName, ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(cf2)).build(), nonceGroup, newNonce);
}
return true;
}
});
// Try with different nonce, now it should fail the checks
try {
UTIL.getHBaseCluster().getMaster().addColumn(tableName, ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(cf2)).build(), nonceGroup, nonceGenerator.newNonce());
Assert.fail();
} catch (InvalidFamilyOperationException e) {
}
// Validate descriptor
TableDescriptor currentHtd = UTIL.getAdmin().getDescriptor(tableName);
assertEquals(!td.isCompactionEnabled(), currentHtd.isCompactionEnabled());
assertEquals(3, currentHtd.getColumnFamilyCount());
assertTrue(currentHtd.hasColumnFamily(Bytes.toBytes(cf2)));
assertTrue(currentHtd.hasColumnFamily(Bytes.toBytes(cf3)));
// cf2 should be added
MasterProcedureTestingUtility.validateTableCreation(UTIL.getHBaseCluster().getMaster(), tableName, regions, "cf1", cf2, cf3);
}
use of org.apache.hadoop.hbase.util.NonceKey in project hbase by apache.
the class HBaseAdmin method convert.
private static ProcedureInfo convert(final ProcedureProtos.Procedure procProto) {
NonceKey nonceKey = null;
if (procProto.getNonce() != HConstants.NO_NONCE) {
nonceKey = new NonceKey(procProto.getNonceGroup(), procProto.getNonce());
}
org.apache.hadoop.hbase.ProcedureState procedureState = org.apache.hadoop.hbase.ProcedureState.valueOf(procProto.getState().name());
return new ProcedureInfo(procProto.getProcId(), procProto.getClassName(), procProto.getOwner(), procedureState, procProto.hasParentId() ? procProto.getParentId() : -1, nonceKey, procProto.hasException() ? ForeignExceptionUtil.toIOException(procProto.getException()) : null, procProto.getLastUpdate(), procProto.getStartTime(), procProto.hasResult() ? procProto.getResult().toByteArray() : null);
}
use of org.apache.hadoop.hbase.util.NonceKey in project hbase by apache.
the class HRegion method doMiniBatchMutate.
/**
* Called to do a piece of the batch that came in to {@link #batchMutate(Mutation[])}
* In here we also handle replay of edits on region recover. Also gets change in size brought
* about by applying {@code batchOp}.
*/
private void doMiniBatchMutate(BatchOperation<?> batchOp) throws IOException {
boolean success = false;
WALEdit walEdit = null;
WriteEntry writeEntry = null;
boolean locked = false;
// We try to set up a batch in the range [batchOp.nextIndexToProcess,lastIndexExclusive)
MiniBatchOperationInProgress<Mutation> miniBatchOp = null;
/**
* Keep track of the locks we hold so we can release them in finally clause
*/
List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.size());
// Check for thread interrupt status in case we have been signaled from
// #interruptRegionOperation.
checkInterrupt();
try {
// STEP 1. Try to acquire as many locks as we can and build mini-batch of operations with
// locked rows
miniBatchOp = batchOp.lockRowsAndBuildMiniBatch(acquiredRowLocks);
// Ensure we acquire at least one.
if (miniBatchOp.getReadyToWriteCount() <= 0) {
// NoSuchColumnFamily?
return;
}
// Check for thread interrupt status in case we have been signaled from
// #interruptRegionOperation. Do it before we take the lock and disable interrupts for
// the WAL append.
checkInterrupt();
lock(this.updatesLock.readLock(), miniBatchOp.getReadyToWriteCount());
locked = true;
// From this point until memstore update this operation should not be interrupted.
disableInterrupts();
// STEP 2. Update mini batch of all operations in progress with LATEST_TIMESTAMP timestamp
// We should record the timestamp only after we have acquired the rowLock,
// otherwise, newer puts/deletes/increment/append are not guaranteed to have a newer
// timestamp
long now = EnvironmentEdgeManager.currentTime();
batchOp.prepareMiniBatchOperations(miniBatchOp, now, acquiredRowLocks);
// STEP 3. Build WAL edit
List<Pair<NonceKey, WALEdit>> walEdits = batchOp.buildWALEdits(miniBatchOp);
for (Iterator<Pair<NonceKey, WALEdit>> it = walEdits.iterator(); it.hasNext(); ) {
Pair<NonceKey, WALEdit> nonceKeyWALEditPair = it.next();
walEdit = nonceKeyWALEditPair.getSecond();
NonceKey nonceKey = nonceKeyWALEditPair.getFirst();
if (walEdit != null && !walEdit.isEmpty()) {
writeEntry = doWALAppend(walEdit, batchOp.durability, batchOp.getClusterIds(), now, nonceKey.getNonceGroup(), nonceKey.getNonce(), batchOp.getOrigLogSeqNum());
}
// Complete mvcc for all but last writeEntry (for replay case)
if (it.hasNext() && writeEntry != null) {
mvcc.complete(writeEntry);
writeEntry = null;
}
}
// STEP 5. Write back to memStore
// NOTE: writeEntry can be null here
writeEntry = batchOp.writeMiniBatchOperationsToMemStore(miniBatchOp, writeEntry);
// STEP 6. Complete MiniBatchOperations: If required calls postBatchMutate() CP hook and
// complete mvcc for last writeEntry
batchOp.completeMiniBatchOperations(miniBatchOp, writeEntry);
writeEntry = null;
success = true;
} finally {
// Call complete rather than completeAndWait because we probably had error if walKey != null
if (writeEntry != null)
mvcc.complete(writeEntry);
if (locked) {
this.updatesLock.readLock().unlock();
}
releaseRowLocks(acquiredRowLocks);
enableInterrupts();
final int finalLastIndexExclusive = miniBatchOp != null ? miniBatchOp.getLastIndexExclusive() : batchOp.size();
final boolean finalSuccess = success;
batchOp.visitBatchOperations(true, finalLastIndexExclusive, (int i) -> {
Mutation mutation = batchOp.getMutation(i);
if (mutation instanceof Increment || mutation instanceof Append) {
if (finalSuccess) {
batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.SUCCESS, batchOp.results[i]);
} else {
batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
}
} else {
batchOp.retCodeDetails[i] = finalSuccess ? OperationStatus.SUCCESS : OperationStatus.FAILURE;
}
return true;
});
batchOp.doPostOpCleanupForMiniBatch(miniBatchOp, walEdit, finalSuccess);
batchOp.nextIndexToProcess = finalLastIndexExclusive;
}
}
Aggregations