Search in sources :

Example 26 with MasterWorkerInfo

use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.

the class DefaultBlockMaster method workerHeartbeat.

@Override
public Command workerHeartbeat(long workerId, Map<String, Long> capacityBytesOnTiers, Map<String, Long> usedBytesOnTiers, List<Long> removedBlockIds, Map<BlockLocation, List<Long>> addedBlocks, Map<String, StorageList> lostStorage, List<Metric> metrics) {
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    if (worker == null) {
        LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
        return Command.newBuilder().setCommandType(CommandType.Register).build();
    }
    // Update the TS before the heartbeat so even if the worker heartbeat processing
    // is time-consuming or triggers GC, the worker does not get marked as lost
    // by the LostWorkerDetectionHeartbeatExecutor
    worker.updateLastUpdatedTimeMs();
    // The address is final, no need for locking
    processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics);
    Command workerCommand = null;
    try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) {
        worker.addLostStorage(lostStorage);
        if (capacityBytesOnTiers != null) {
            worker.updateCapacityBytes(capacityBytesOnTiers);
        }
        worker.updateUsedBytes(usedBytesOnTiers);
        // Technically, 'worker' should be confirmed to still be in the data structure. Lost worker
        // detection can remove it. However, we are intentionally ignoring this race, since the worker
        // will just re-register regardless.
        processWorkerRemovedBlocks(worker, removedBlockIds, false);
        processWorkerAddedBlocks(worker, addedBlocks);
        Set<Long> toRemoveBlocks = worker.getToRemoveBlocks();
        if (toRemoveBlocks.isEmpty()) {
            workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build();
        } else {
            workerCommand = Command.newBuilder().setCommandType(CommandType.Free).addAllData(toRemoveBlocks).build();
        }
    }
    // Update the TS again
    worker.updateLastUpdatedTimeMs();
    // Should not reach here
    Preconditions.checkNotNull(workerCommand, "Worker heartbeat response command is null!");
    return workerCommand;
}
Also used : LockResource(alluxio.resource.LockResource) Command(alluxio.grpc.Command) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo)

Example 27 with MasterWorkerInfo

use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.

the class DefaultBlockMaster method removeBlocks.

@Override
public void removeBlocks(Collection<Long> blockIds, boolean delete) throws UnavailableException {
    try (JournalContext journalContext = createJournalContext()) {
        for (long blockId : blockIds) {
            Set<Long> workerIds;
            try (LockResource r = lockBlock(blockId)) {
                Optional<BlockMeta> block = mBlockStore.getBlock(blockId);
                if (!block.isPresent()) {
                    continue;
                }
                List<BlockLocation> locations = mBlockStore.getLocations(blockId);
                workerIds = new HashSet<>(locations.size());
                for (BlockLocation loc : locations) {
                    workerIds.add(loc.getWorkerId());
                }
                // processWorkerRemovedBlocks
                if (delete) {
                    // Make sure blockId is removed from mLostBlocks when the block metadata is deleted.
                    // Otherwise blockId in mLostBlock can be dangling index if the metadata is gone.
                    mLostBlocks.remove(blockId);
                    mBlockStore.removeBlock(blockId);
                    JournalEntry entry = JournalEntry.newBuilder().setDeleteBlock(DeleteBlockEntry.newBuilder().setBlockId(blockId)).build();
                    journalContext.append(entry);
                }
            }
            // workerRegister should be changed to address this race condition.
            for (long workerId : workerIds) {
                MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
                if (worker != null) {
                    try (LockResource r = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) {
                        worker.updateToRemovedBlock(true, blockId);
                    }
                }
            }
        }
    }
}
Also used : LockResource(alluxio.resource.LockResource) JournalContext(alluxio.master.journal.JournalContext) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) BlockLocation(alluxio.proto.meta.Block.BlockLocation) BlockMeta(alluxio.proto.meta.Block.BlockMeta) JournalEntry(alluxio.proto.journal.Journal.JournalEntry)

Example 28 with MasterWorkerInfo

use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.

the class DefaultBlockMaster method workerRegisterFinish.

@Override
public void workerRegisterFinish(WorkerRegisterContext context) {
    MasterWorkerInfo worker = context.mWorker;
    Preconditions.checkState(worker != null, "No worker metadata found in the WorkerRegisterContext!");
    // Detect any lost blocks on this worker.
    Set<Long> removedBlocks;
    if (worker.mIsRegistered) {
        // This is a re-register of an existing worker. Assume the new block ownership data is more
        // up-to-date and update the existing block information.
        LOG.info("re-registering an existing workerId: {}", worker.getId());
        // The toRemoveBlocks field now contains all the updates
        // after all the blocks have been processed.
        removedBlocks = worker.getToRemoveBlocks();
    } else {
        removedBlocks = Collections.emptySet();
    }
    LOG.info("Found {} blocks to remove from the worker", removedBlocks.size());
    processWorkerRemovedBlocks(worker, removedBlocks, true);
    // Mark registered successfully
    worker.mIsRegistered = true;
    recordWorkerRegistration(worker.getId());
    // Update the TS at the end of the process
    worker.updateLastUpdatedTimeMs();
    // Invalidate cache to trigger new build of worker info list
    mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY);
    LOG.info("Worker successfully registered: {}", worker);
    mActiveRegisterContexts.remove(worker.getId());
}
Also used : MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo)

Example 29 with MasterWorkerInfo

use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.

the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorker.

@Test
public // This can happen when a worker process is restarted.
void registerExistingWorker() throws Exception {
    long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
    List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
    prepareBlocksOnMaster(requestChunks);
    Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
    assertEquals(0, errorQueue.size());
    // Verify the worker has registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    // Register again
    Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
    assertEquals(0, newErrorQueue.size());
    // Verify the worker is registered
    MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL, updatedWorker.getBlockCount());
    assertEquals(0, updatedWorker.getToRemoveBlockCount());
    assertEquals(1, mBlockMaster.getWorkerCount());
    // Verify the worker is readable and writable
    verifyWorkerWritable(workerId);
}
Also used : MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 30 with MasterWorkerInfo

use of alluxio.master.block.meta.MasterWorkerInfo in project alluxio by Alluxio.

the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorkerBlocksAdded.

@Test
public void registerExistingWorkerBlocksAdded() throws Exception {
    long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
    // Register the worker for the 1st time
    List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
    prepareBlocksOnMaster(requestChunks);
    Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
    assertEquals(0, errorQueue.size());
    // Verify the worker has registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL, worker.getBlockCount());
    assertEquals(0, worker.getToRemoveBlockCount());
    // Generate block IDs in the same way but add some more
    Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(TIER_CONFIG));
    Set<Long> addedBlocks = addSomeBlocks(blockMap);
    // Make the master accept these blocks
    prepareBlocksOnMaster(addedBlocks);
    // Re-generate the request
    List<String> tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG));
    Map<String, Long> capacityMap = Maps.toMap(tierAliases, (tier) -> CAPACITY);
    Map<String, Long> usedMap = Maps.toMap(tierAliases, (tier) -> 0L);
    RegisterStreamer newRegisterStreamer = new RegisterStreamer(null, workerId, tierAliases, capacityMap, usedMap, blockMap, LOST_STORAGE, EMPTY_CONFIG);
    List<RegisterWorkerPRequest> newRequestChunks = ImmutableList.copyOf(newRegisterStreamer);
    int newExpectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL + addedBlocks.size()) / (double) BATCH_SIZE);
    assertEquals(newExpectedBatchCount, newRequestChunks.size());
    // Register again with the new request stream
    Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(newRequestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
    assertEquals(0, newErrorQueue.size());
    // Verify the worker is registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL + addedBlocks.size(), updatedWorker.getBlockCount());
    assertEquals(0, updatedWorker.getToRemoveBlockCount());
    // No command from the master because the update is received during registration
    assertEquals(EMPTY_CMD, sendHeartbeatToMaster(workerId));
    // Verify the worker is readable and writable
    verifyWorkerWritable(workerId);
}
Also used : RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) RegisterStreamer(alluxio.worker.block.RegisterStreamer) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) StorageList(alluxio.grpc.StorageList) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) BlockStoreLocation(alluxio.worker.block.BlockStoreLocation) Test(org.junit.Test)

Aggregations

MasterWorkerInfo (alluxio.master.block.meta.MasterWorkerInfo)34 RegisterWorkerPRequest (alluxio.grpc.RegisterWorkerPRequest)10 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)9 LockResource (alluxio.resource.LockResource)8 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 StorageList (alluxio.grpc.StorageList)7 List (java.util.List)7 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)6 Command (alluxio.grpc.Command)6 HashMap (java.util.HashMap)6 HashSet (java.util.HashSet)6 Map (java.util.Map)6 BlockLocation (alluxio.proto.meta.Block.BlockLocation)5 WorkerInfo (alluxio.wire.WorkerInfo)5 NotFoundException (alluxio.exception.status.NotFoundException)4 UnavailableException (alluxio.exception.status.UnavailableException)4 Address (alluxio.wire.Address)4 BlockInfo (alluxio.wire.BlockInfo)4 WorkerNetAddress (alluxio.wire.WorkerNetAddress)4