use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class DefaultBlockMaster method workerHeartbeat.
@Override
public Command workerHeartbeat(long workerId, Map<String, Long> capacityBytesOnTiers, Map<String, Long> usedBytesOnTiers, List<Long> removedBlockIds, Map<BlockLocation, List<Long>> addedBlocks, Map<String, StorageList> lostStorage, List<Metric> metrics) {
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
if (worker == null) {
LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
return Command.newBuilder().setCommandType(CommandType.Register).build();
}
// Update the TS before the heartbeat so even if the worker heartbeat processing
// is time-consuming or triggers GC, the worker does not get marked as lost
// by the LostWorkerDetectionHeartbeatExecutor
worker.updateLastUpdatedTimeMs();
// The address is final, no need for locking
processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics);
Command workerCommand = null;
try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) {
worker.addLostStorage(lostStorage);
if (capacityBytesOnTiers != null) {
worker.updateCapacityBytes(capacityBytesOnTiers);
}
worker.updateUsedBytes(usedBytesOnTiers);
// Technically, 'worker' should be confirmed to still be in the data structure. Lost worker
// detection can remove it. However, we are intentionally ignoring this race, since the worker
// will just re-register regardless.
processWorkerRemovedBlocks(worker, removedBlockIds, false);
processWorkerAddedBlocks(worker, addedBlocks);
Set<Long> toRemoveBlocks = worker.getToRemoveBlocks();
if (toRemoveBlocks.isEmpty()) {
workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build();
} else {
workerCommand = Command.newBuilder().setCommandType(CommandType.Free).addAllData(toRemoveBlocks).build();
}
}
// Update the TS again
worker.updateLastUpdatedTimeMs();
// Should not reach here
Preconditions.checkNotNull(workerCommand, "Worker heartbeat response command is null!");
return workerCommand;
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class DefaultBlockMaster method removeBlocks.
@Override
public void removeBlocks(Collection<Long> blockIds, boolean delete) throws UnavailableException {
try (JournalContext journalContext = createJournalContext()) {
for (long blockId : blockIds) {
Set<Long> workerIds;
try (LockResource r = lockBlock(blockId)) {
Optional<BlockMeta> block = mBlockStore.getBlock(blockId);
if (!block.isPresent()) {
continue;
}
List<BlockLocation> locations = mBlockStore.getLocations(blockId);
workerIds = new HashSet<>(locations.size());
for (BlockLocation loc : locations) {
workerIds.add(loc.getWorkerId());
}
// processWorkerRemovedBlocks
if (delete) {
// Make sure blockId is removed from mLostBlocks when the block metadata is deleted.
// Otherwise blockId in mLostBlock can be dangling index if the metadata is gone.
mLostBlocks.remove(blockId);
mBlockStore.removeBlock(blockId);
JournalEntry entry = JournalEntry.newBuilder().setDeleteBlock(DeleteBlockEntry.newBuilder().setBlockId(blockId)).build();
journalContext.append(entry);
}
}
// workerRegister should be changed to address this race condition.
for (long workerId : workerIds) {
MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
if (worker != null) {
try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) {
worker.updateToRemovedBlock(true, blockId);
}
}
}
}
}
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class DefaultBlockMaster method workerRegisterFinish.
@Override
public void workerRegisterFinish(WorkerRegisterContext context) {
MasterWorkerInfo worker = context.mWorker;
Preconditions.checkState(worker != null, "No worker metadata found in the WorkerRegisterContext!");
// Detect any lost blocks on this worker.
Set<Long> removedBlocks;
if (worker.mIsRegistered) {
// This is a re-register of an existing worker. Assume the new block ownership data is more
// up-to-date and update the existing block information.
LOG.info("re-registering an existing workerId: {}", worker.getId());
// The toRemoveBlocks field now contains all the updates
// after all the blocks have been processed.
removedBlocks = worker.getToRemoveBlocks();
} else {
removedBlocks = Collections.emptySet();
}
LOG.info("Found {} blocks to remove from the worker", removedBlocks.size());
processWorkerRemovedBlocks(worker, removedBlocks, true);
// Mark registered successfully
worker.mIsRegistered = true;
recordWorkerRegistration(worker.getId());
// Update the TS at the end of the process
worker.updateLastUpdatedTimeMs();
// Invalidate cache to trigger new build of worker info list
mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY);
LOG.info("Worker successfully registered: {}", worker);
mActiveRegisterContexts.remove(worker.getId());
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorker.
@Test
public // This can happen when a worker process is restarted.
void registerExistingWorker() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
// Verify the worker has registered
assertEquals(1, mBlockMaster.getWorkerCount());
// Register again
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
assertEquals(0, newErrorQueue.size());
// Verify the worker is registered
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, updatedWorker.getBlockCount());
assertEquals(0, updatedWorker.getToRemoveBlockCount());
assertEquals(1, mBlockMaster.getWorkerCount());
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorkerBlocksAdded.
@Test
public void registerExistingWorkerBlocksAdded() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
// Register the worker for the 1st time
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
// Verify the worker has registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, worker.getBlockCount());
assertEquals(0, worker.getToRemoveBlockCount());
// Generate block IDs in the same way but add some more
Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(TIER_CONFIG));
Set<Long> addedBlocks = addSomeBlocks(blockMap);
// Make the master accept these blocks
prepareBlocksOnMaster(addedBlocks);
// Re-generate the request
List<String> tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG));
Map<String, Long> capacityMap = Maps.toMap(tierAliases, (tier) -> CAPACITY);
Map<String, Long> usedMap = Maps.toMap(tierAliases, (tier) -> 0L);
RegisterStreamer newRegisterStreamer = new RegisterStreamer(null, workerId, tierAliases, capacityMap, usedMap, blockMap, LOST_STORAGE, EMPTY_CONFIG);
List<RegisterWorkerPRequest> newRequestChunks = ImmutableList.copyOf(newRegisterStreamer);
int newExpectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL + addedBlocks.size()) / (double) BATCH_SIZE);
assertEquals(newExpectedBatchCount, newRequestChunks.size());
// Register again with the new request stream
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(newRequestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
assertEquals(0, newErrorQueue.size());
// Verify the worker is registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL + addedBlocks.size(), updatedWorker.getBlockCount());
assertEquals(0, updatedWorker.getToRemoveBlockCount());
// No command from the master because the update is received during registration
assertEquals(EMPTY_CMD, sendHeartbeatToMaster(workerId));
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
Aggregations