use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method verifyWorkerCanReregister.
// Verify a worker can reregister and have the correct final blocks
private void verifyWorkerCanReregister(long workerId, List<RegisterWorkerPRequest> requestChunks, int expectedBlockCount) throws Exception {
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
assertEquals(expectedBlockCount, worker.getBlockCount());
assertEquals(0, worker.getToRemoveBlockCount());
assertEquals(1, mBlockMaster.getWorkerCount());
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerLostWorker.
@Test
public // The master has marked the worker as lost.
void registerLostWorker() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
// The worker registers to the master
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
// Verify the worker has been registered
assertEquals(0, errorQueue.size());
assertEquals(1, mBlockMaster.getWorkerCount());
// The worker has lost heartbeat and been forgotten
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
long newTimeMs = worker.getLastUpdatedTimeMs() + MASTER_WORKER_TIMEOUT + 1;
mClock.setTimeMs(newTimeMs);
DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor lostWorkerDetector = ((DefaultBlockMaster) mBlockMaster).new LostWorkerDetectionHeartbeatExecutor();
lostWorkerDetector.heartbeat();
// Verify the worker has been forgotten
assertEquals(0, mBlockMaster.getWorkerCount());
// Register again
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
// Verify the worker is registered again
assertEquals(0, errorQueue.size());
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, updatedWorker.getBlockCount());
assertEquals(0, updatedWorker.getToRemoveBlockCount());
assertEquals(1, mBlockMaster.getWorkerCount());
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method reregisterWithDelete.
/**
* Tests below cover the race conditions during concurrent executions.
*
* When the worker registers for the 1st time, no clients should know this worker.
* Therefore there is no concurrent client-incurred write operations on this worker.
* The races happen typically when the worker re-registers with the master,
* where some clients already know this worker and can direct invoke writes on the worker.
*
* Tests here verify the integrity of the master-side metadata.
* In other words, we assume those writers succeed on the worker, and the subsequent
* update on the master-side metadata should also succeed and be correct.
*/
@Test
public void reregisterWithDelete() throws Exception {
// Register the worker so the worker is marked active in master
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
assertEquals(1, mBlockMaster.getWorkerCount());
// Find a block to remove
long blockToRemove = RegisterStreamTestUtils.findFirstBlock(requestChunks);
// Register again
CountDownLatch latch = new CountDownLatch(1);
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
Future f = mExecutorService.submit(() -> {
sendStreamToMasterAndSignal(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue), latch);
});
// During the register stream, trigger a delete on worker
latch.await();
mBlockMaster.removeBlocks(ImmutableList.of(blockToRemove), true);
// Wait for the register to finish
f.get();
assertThrows(BlockInfoException.class, () -> {
mBlockMaster.getBlockInfo(blockToRemove);
});
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
assertEquals(1, mBlockMaster.getWorkerCount());
assertEquals(TIER_BLOCK_TOTAL - 1, worker.getBlockCount());
// BlockMaster.removeBlocks() will first remove the block from master metadata
// (with block lock) then update the block locations (with worker lock).
// The worker lock is being held by the registering worker, but the 1st part
// will likely succeed.
// So during registration when checking on the block, the block is not recognized
// any more and will remain in MasterWorkerInfo.mToRemoveBlocks.
// In the next heartbeat the master will issue a command to remove the block
// from the worker.
// Even if the block is already removed on the worker it is fine,
// because deletion of a not-found block is a noop.
Command command = sendHeartbeatToMaster(workerId);
assertEquals(Command.newBuilder().addData(blockToRemove).setCommandType(CommandType.Free).build(), command);
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorkerBlocksLost.
@Test
public void registerExistingWorkerBlocksLost() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
// Register the worker for the 1st time
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
// Verify the worker has registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, worker.getBlockCount());
assertEquals(0, worker.getToRemoveBlockCount());
// Manually generate the blocks again and remove some
List<String> tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG));
Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(TIER_CONFIG));
Set<Long> lostBlocks = removeSomeBlocks(blockMap);
// Regenerate the requests
RegisterStreamer newRegisterStreamer = new RegisterStreamer(null, workerId, tierAliases, CAPACITY_MAP, USAGE_MAP, blockMap, LOST_STORAGE, EMPTY_CONFIG);
List<RegisterWorkerPRequest> newRequestChunks = ImmutableList.copyOf(newRegisterStreamer);
int newExpectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL - lostBlocks.size()) / (double) BATCH_SIZE);
assertEquals(newExpectedBatchCount, newRequestChunks.size());
// Register again with the updated stream
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(newRequestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
assertEquals(0, newErrorQueue.size());
// Verify the worker is registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL - lostBlocks.size(), updatedWorker.getBlockCount());
// The master will mark the lost blocks as to be removed
// This is to ensure the unrecognized blocks do no live on the worker anymore
assertEquals(lostBlocks.size(), updatedWorker.getToRemoveBlockCount());
// The update is received during the registration so no command to send to the worker
Command command = sendHeartbeatToMaster(workerId);
assertEquals(CommandType.Free, command.getCommandType());
assertEquals(lostBlocks, new HashSet<>(command.getDataList()));
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
Aggregations