Example 6 with NonceKey

the class TestProcedureNonce method testCompletedProcWithSameNonce.

public void testCompletedProcWithSameNonce() throws Exception {
    final long nonceGroup = 123;
    final long nonce = 2222;
    // register the nonce
    final NonceKey nonceKey = procExecutor.createNonceKey(nonceGroup, nonce);
    assertFalse(procExecutor.registerNonce(nonceKey) >= 0);
    // Submit a proc and wait for its completion
    Procedure proc = new TestSingleStepProcedure();
    long procId = procExecutor.submitProcedure(proc, nonceKey);
    ProcedureTestingUtility.waitProcedure(procExecutor, procId);
    // Restart
    ProcedureTestingUtility.waitProcedure(procExecutor, procId);
    // try to register a procedure with the same nonce
    // we should get back the old procId
    assertEquals(procId, procExecutor.registerNonce(nonceKey));
    Procedure<?> result = procExecutor.getResult(procId);
Test(org.junit.Test)

Example 7 with NonceKey

the class ProcedureExecutor method loadProcedures.

private void loadProcedures(ProcedureIterator procIter, boolean abortOnCorruption) throws IOException {
    // 1. Build the rollback stack
    int runnableCount = 0;
    int failedCount = 0;
    int waitingCount = 0;
    int waitingTimeoutCount = 0;
    while (procIter.hasNext()) {
        boolean finished = procIter.isNextFinished();
        @SuppressWarnings("unchecked") Procedure<TEnvironment> proc =;
        NonceKey nonceKey = proc.getNonceKey();
        long procId = proc.getProcId();
        if (finished) {
            completed.put(proc.getProcId(), new CompletedProcedureRetainer<>(proc));
            LOG.debug("Completed {}", proc);
        } else {
            if (!proc.hasParent()) {
                assert !proc.isFinished() : "unexpected finished procedure";
                rollbackStack.put(proc.getProcId(), new RootProcedureState<>());
            // add the procedure to the map
            procedures.put(proc.getProcId(), proc);
            switch(proc.getState()) {
                case RUNNABLE:
                case FAILED:
                case WAITING:
                case WAITING_TIMEOUT:
        if (nonceKey != null) {
            // add the nonce to the map
            nonceKeysToProcIdsMap.put(nonceKey, procId);
    // 2. Initialize the stacks: In the old implementation, for procedures in FAILED state, we will
    // push it into the ProcedureScheduler directly to execute the rollback. But this does not work
    // after we introduce the restore lock stage. For now, when we acquire a xlock, we will remove
    // the queue from runQueue in scheduler, and then when a procedure which has lock access, for
    // example, a sub procedure of the procedure which has the xlock, is pushed into the scheduler,
    // we will add the queue back to let the workers poll from it. The assumption here is that, the
    // procedure which has the xlock should have been polled out already, so when loading we can not
    // add the procedure to scheduler first and then call acquireLock, since the procedure is still
    // in the queue, and since we will remove the queue from runQueue, then no one can poll it out,
    // then there is a dead lock
    List<Procedure<TEnvironment>> runnableList = new ArrayList<>(runnableCount);
    List<Procedure<TEnvironment>> failedList = new ArrayList<>(failedCount);
    List<Procedure<TEnvironment>> waitingList = new ArrayList<>(waitingCount);
    List<Procedure<TEnvironment>> waitingTimeoutList = new ArrayList<>(waitingTimeoutCount);
    while (procIter.hasNext()) {
        if (procIter.isNextFinished()) {
        @SuppressWarnings("unchecked") Procedure<TEnvironment> proc =;
        assert !(proc.isFinished() && !proc.hasParent()) : "unexpected completed proc=" + proc;
        LOG.debug("Loading {}", proc);
        Long rootProcId = getRootProcedureId(proc);
        // The orphan procedures will be passed to handleCorrupted, so add an assert here
        assert rootProcId != null;
        if (proc.hasParent()) {
            Procedure<TEnvironment> parent = procedures.get(proc.getParentProcId());
            if (parent != null && !proc.isFinished()) {
        RootProcedureState<TEnvironment> procStack = rollbackStack.get(rootProcId);
        switch(proc.getState()) {
            case RUNNABLE:
            case WAITING:
            case WAITING_TIMEOUT:
            case FAILED:
            case ROLLEDBACK:
            case INITIALIZING:
                String msg = "Unexpected " + proc.getState() + " state for " + proc;
                throw new UnsupportedOperationException(msg);
    // 3. Check the waiting procedures to see if some of them can be added to runnable.
    waitingList.forEach(proc -> {
        if (!proc.hasChildren()) {
            // Normally, WAITING procedures should be waken by its children. But, there is a case that,
            // all the children are successful and before they can wake up their parent procedure, the
            // master was killed. So, during recovering the procedures from ProcedureWal, its children
            // are not loaded because of their SUCCESS state. So we need to continue to run this WAITING
            // procedure. But before executing, we need to set its state to RUNNABLE, otherwise, a
            // exception will throw:
            // Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,
            // "NOT RUNNABLE! " + procedure.toString());
        } else {
    // 4. restore locks
    // 5. Push the procedures to the timeout executor
    waitingTimeoutList.forEach(proc -> {
    // 6. Push the procedure to the scheduler
    runnableList.forEach(p -> {
        if (!p.hasParent()) {
    // After all procedures put into the queue, signal the worker threads.
    // Otherwise, there is a race condition. See HBASE-21364.
ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 8 with NonceKey

the class TestModifyTableProcedure method testColumnFamilyAdditionTwiceWithNonce.

public void testColumnFamilyAdditionTwiceWithNonce() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    final String cf2 = "cf2";
    final String cf3 = "cf3";
    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
    // create the table
    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(procExec, tableName, null, "cf1", cf3);
    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
    // Modify multiple properties of the table.
    TableDescriptor td = UTIL.getAdmin().getDescriptor(tableName);
    TableDescriptor newTd = TableDescriptorBuilder.newBuilder(td).setCompactionEnabled(!td.isCompactionEnabled()).setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf2)).build();
    PerClientRandomNonceGenerator nonceGenerator = PerClientRandomNonceGenerator.get();
    long nonceGroup = nonceGenerator.getNonceGroup();
    long newNonce = nonceGenerator.newNonce();
    NonceKey nonceKey = new NonceKey(nonceGroup, newNonce);
    // Start the Modify procedure && kill the executor
    final long procId = procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), newTd), nonceKey);
    // Restart the executor after MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR and try to add column family
    // as nonce are there , we should not fail
    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, new StepHook() {

        public boolean execute(int step) throws IOException {
            if (step == 3) {
                return procId == UTIL.getHBaseCluster().getMaster().addColumn(tableName, ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(cf2)).build(), nonceGroup, newNonce);
            return true;
    // Try with different nonce, now it should fail the checks
    try {
        UTIL.getHBaseCluster().getMaster().addColumn(tableName, ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(cf2)).build(), nonceGroup, nonceGenerator.newNonce());;
    } catch (InvalidFamilyOperationException e) {
    // Validate descriptor
    TableDescriptor currentHtd = UTIL.getAdmin().getDescriptor(tableName);
    assertEquals(!td.isCompactionEnabled(), currentHtd.isCompactionEnabled());
    assertEquals(3, currentHtd.getColumnFamilyCount());
    // cf2 should be added
    MasterProcedureTestingUtility.validateTableCreation(UTIL.getHBaseCluster().getMaster(), tableName, regions, "cf1", cf2, cf3);
Test(org.junit.Test)

Example 9 with NonceKey

the class HBaseAdmin method convert.

private static ProcedureInfo convert(final ProcedureProtos.Procedure procProto) {
    NonceKey nonceKey = null;
    if (procProto.getNonce() != HConstants.NO_NONCE) {
        nonceKey = new NonceKey(procProto.getNonceGroup(), procProto.getNonce());
    org.apache.hadoop.hbase.ProcedureState procedureState = org.apache.hadoop.hbase.ProcedureState.valueOf(procProto.getState().name());
    return new ProcedureInfo(procProto.getProcId(), procProto.getClassName(), procProto.getOwner(), procedureState, procProto.hasParentId() ? procProto.getParentId() : -1, nonceKey, procProto.hasException() ? ForeignExceptionUtil.toIOException(procProto.getException()) : null, procProto.getLastUpdate(), procProto.getStartTime(), procProto.hasResult() ? procProto.getResult().toByteArray() : null);
Also used : NonceKey(org.apache.hadoop.hbase.util.NonceKey) ProcedureInfo(org.apache.hadoop.hbase.ProcedureInfo)

Example 10 with NonceKey

the class HRegion method doMiniBatchMutate.

 * Called to do a piece of the batch that came in to {@link #batchMutate(Mutation[])}
 * In here we also handle replay of edits on region recover. Also gets change in size brought
 * about by applying {@code batchOp}.
private void doMiniBatchMutate(BatchOperation<?> batchOp) throws IOException {
    boolean success = false;
    WALEdit walEdit = null;
    WriteEntry writeEntry = null;
    boolean locked = false;
    // We try to set up a batch in the range [batchOp.nextIndexToProcess,lastIndexExclusive)
    MiniBatchOperationInProgress<Mutation> miniBatchOp = null;
     * Keep track of the locks we hold so we can release them in finally clause
    List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.size());
    // Check for thread interrupt status in case we have been signaled from
    // #interruptRegionOperation.
    try {
        // STEP 1. Try to acquire as many locks as we can and build mini-batch of operations with
        // locked rows
        miniBatchOp = batchOp.lockRowsAndBuildMiniBatch(acquiredRowLocks);
        // Ensure we acquire at least one.
        if (miniBatchOp.getReadyToWriteCount() <= 0) {
            // NoSuchColumnFamily?
        // Check for thread interrupt status in case we have been signaled from
        // #interruptRegionOperation. Do it before we take the lock and disable interrupts for
        // the WAL append.
        lock(this.updatesLock.readLock(), miniBatchOp.getReadyToWriteCount());
        locked = true;
        // From this point until memstore update this operation should not be interrupted.
        // STEP 2. Update mini batch of all operations in progress with LATEST_TIMESTAMP timestamp
        // We should record the timestamp only after we have acquired the rowLock,
        // otherwise, newer puts/deletes/increment/append are not guaranteed to have a newer
        // timestamp
        long now = EnvironmentEdgeManager.currentTime();
        batchOp.prepareMiniBatchOperations(miniBatchOp, now, acquiredRowLocks);
        // STEP 3. Build WAL edit
        List<Pair<NonceKey, WALEdit>> walEdits = batchOp.buildWALEdits(miniBatchOp);
        for (Iterator<Pair<NonceKey, WALEdit>> it = walEdits.iterator(); it.hasNext(); ) {
            Pair<NonceKey, WALEdit> nonceKeyWALEditPair =;
            walEdit = nonceKeyWALEditPair.getSecond();
            NonceKey nonceKey = nonceKeyWALEditPair.getFirst();
            if (walEdit != null && !walEdit.isEmpty()) {
                writeEntry = doWALAppend(walEdit, batchOp.durability, batchOp.getClusterIds(), now, nonceKey.getNonceGroup(), nonceKey.getNonce(), batchOp.getOrigLogSeqNum());
            // Complete mvcc for all but last writeEntry (for replay case)
            if (it.hasNext() && writeEntry != null) {
                writeEntry = null;
        // STEP 5. Write back to memStore
        // NOTE: writeEntry can be null here
        writeEntry = batchOp.writeMiniBatchOperationsToMemStore(miniBatchOp, writeEntry);
        // STEP 6. Complete MiniBatchOperations: If required calls postBatchMutate() CP hook and
        // complete mvcc for last writeEntry
        batchOp.completeMiniBatchOperations(miniBatchOp, writeEntry);
        writeEntry = null;
        success = true;
    } finally {
        // Call complete rather than completeAndWait because we probably had error if walKey != null
        if (writeEntry != null)
        if (locked) {
        final int finalLastIndexExclusive = miniBatchOp != null ? miniBatchOp.getLastIndexExclusive() : batchOp.size();
        final boolean finalSuccess = success;
        batchOp.visitBatchOperations(true, finalLastIndexExclusive, (int i) -> {
            Mutation mutation = batchOp.getMutation(i);
            if (mutation instanceof Increment || mutation instanceof Append) {
                if (finalSuccess) {
                    batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.SUCCESS, batchOp.results[i]);
                } else {
                    batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
            } else {
                batchOp.retCodeDetails[i] = finalSuccess ? OperationStatus.SUCCESS : OperationStatus.FAILURE;
            return true;
        batchOp.doPostOpCleanupForMiniBatch(miniBatchOp, walEdit, finalSuccess);
        batchOp.nextIndexToProcess = finalLastIndexExclusive;
Also used : WriteEntry(org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry) Append(org.apache.hadoop.hbase.client.Append) WALEdit(org.apache.hadoop.hbase.wal.WALEdit) NonceKey(org.apache.hadoop.hbase.util.NonceKey) Increment(org.apache.hadoop.hbase.client.Increment) Mutation(org.apache.hadoop.hbase.client.Mutation) Pair(org.apache.hadoop.hbase.util.Pair)


