Search in sources :

Example 21 with RegisterWorkerPRequest

use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.

the class RegisterStreamTestUtils method generateRegisterStreamForEmptyWorker.

public static List<RegisterWorkerPRequest> generateRegisterStreamForEmptyWorker(long workerId) {
    String tierConfig = "";
    // Generate block IDs heuristically
    Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(tierConfig));
    RegisterStreamer registerStreamer = new RegisterStreamer(null, workerId, ImmutableList.of("MEM"), MEM_CAPACITY, MEM_USAGE_EMPTY, blockMap, LOST_STORAGE, EMPTY_CONFIG);
    // For an empty worker there is only 1 request
    List<RegisterWorkerPRequest> requestChunks = ImmutableList.copyOf(registerStreamer);
    assertEquals(1, requestChunks.size());
    return requestChunks;
}
Also used : List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) BlockStoreLocation(alluxio.worker.block.BlockStoreLocation) RegisterStreamer(alluxio.worker.block.RegisterStreamer)

Example 22 with RegisterWorkerPRequest

use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.

the class RegisterStreamer method registerInternal.

private void registerInternal() throws InterruptedException, DeadlineExceededException, CancelledException, InternalException {
    int iter = 0;
    while (hasNext()) {
        // Send a request when the master ACKs the previous one
        LOG.debug("Worker {} - Acquiring one token to send the next batch", mWorkerId);
        Instant start = Instant.now();
        if (!mBucket.tryAcquire(mResponseTimeoutMs, TimeUnit.MILLISECONDS)) {
            throw new DeadlineExceededException(String.format("No response from master for more than %dms during the stream!", mResponseTimeoutMs));
        }
        Instant end = Instant.now();
        LOG.debug("Worker {} - master ACK received in {}ms, sending the next batch {}", mWorkerId, Duration.between(start, end).toMillis(), iter);
        // Send the request
        RegisterWorkerPRequest request = next();
        mWorkerRequestObserver.onNext(request);
        if (mFinishLatch.getCount() == 0) {
            abort();
        }
        iter++;
    }
    // If the master side is closed before the client side, there is a problem
    if (mFinishLatch.getCount() == 0) {
        abort();
    }
    // Wait for all batches have been ACK-ed by the master before completing the client side
    if (!mAckLatch.await(mResponseTimeoutMs * MAX_BATCHES_IN_FLIGHT, TimeUnit.MILLISECONDS)) {
        long receivedCount = mBlockMapIterator.getBatchCount() - mAckLatch.getCount();
        throw new DeadlineExceededException(String.format("All batches have been sent to the master but only received %d ACKs!", receivedCount));
    }
    LOG.info("Worker {} - All requests have been sent. Completing the client side.", mWorkerId);
    mWorkerRequestObserver.onCompleted();
    LOG.info("Worker {} - Waiting on the master side to complete", mWorkerId);
    if (!mFinishLatch.await(mCompleteTimeoutMs, TimeUnit.MILLISECONDS)) {
        throw new DeadlineExceededException(String.format("All batches have been received by the master but the master failed" + " to complete the registration in %dms!", mCompleteTimeoutMs));
    }
    // If the master failed in completing the request, there will also be an error
    if (mError.get() != null) {
        Throwable t = mError.get();
        LOG.error("Worker {} - Received an error from the master on completion", mWorkerId, t);
        throw new InternalException(t);
    }
    LOG.info("Worker {} - Finished registration with a stream", mWorkerId);
}
Also used : Instant(java.time.Instant) DeadlineExceededException(alluxio.exception.status.DeadlineExceededException) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) InternalException(alluxio.exception.status.InternalException)

Example 23 with RegisterWorkerPRequest

use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.

the class BlockMasterClient method register.

/**
 * The method the worker should execute to register with the block master.
 *
 * @param workerId the worker id of the worker registering
 * @param storageTierAliases a list of storage tier aliases in ordinal order
 * @param totalBytesOnTiers mapping from storage tier alias to total bytes
 * @param usedBytesOnTiers mapping from storage tier alias to used bytes
 * @param currentBlocksOnLocation mapping from storage tier alias to the list of list of blocks
 * @param lostStorage mapping from storage tier alias to the list of lost storage paths
 * @param configList a list of configurations
 */
// TODO(yupeng): rename to workerBlockReport or workerInitialize?
public void register(final long workerId, final List<String> storageTierAliases, final Map<String, Long> totalBytesOnTiers, final Map<String, Long> usedBytesOnTiers, final Map<BlockStoreLocation, List<Long>> currentBlocksOnLocation, final Map<String, List<String>> lostStorage, final List<ConfigProperty> configList) throws IOException {
    final RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList).build();
    final List<LocationBlockIdListEntry> currentBlocks = convertBlockListMapToProto(currentBlocksOnLocation);
    final Map<String, StorageList> lostStorageMap = lostStorage.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> StorageList.newBuilder().addAllStorage(e.getValue()).build()));
    final RegisterWorkerPRequest request = RegisterWorkerPRequest.newBuilder().setWorkerId(workerId).addAllStorageTiers(storageTierAliases).putAllTotalBytesOnTiers(totalBytesOnTiers).putAllUsedBytesOnTiers(usedBytesOnTiers).addAllCurrentBlocks(currentBlocks).putAllLostStorage(lostStorageMap).setOptions(options).build();
    retryRPC(() -> {
        mClient.registerWorker(request);
        return null;
    }, LOG, "Register", "workerId=%d", workerId);
}
Also used : WorkerNetAddress(alluxio.wire.WorkerNetAddress) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) PropertyKey(alluxio.conf.PropertyKey) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) BlockMasterWorkerServiceGrpc(alluxio.grpc.BlockMasterWorkerServiceGrpc) BlockHeartbeatPOptions(alluxio.grpc.BlockHeartbeatPOptions) Constants(alluxio.Constants) GetRegisterLeasePRequest(alluxio.grpc.GetRegisterLeasePRequest) GrpcUtils(alluxio.grpc.GrpcUtils) GetWorkerIdPRequest(alluxio.grpc.GetWorkerIdPRequest) Map(java.util.Map) BlockHeartbeatPRequest(alluxio.grpc.BlockHeartbeatPRequest) LocationBlockIdListEntry(alluxio.grpc.LocationBlockIdListEntry) AbstractMasterClient(alluxio.AbstractMasterClient) Metric(alluxio.grpc.Metric) RetryPolicy(alluxio.retry.RetryPolicy) BlockIdList(alluxio.grpc.BlockIdList) CommitBlockInUfsPRequest(alluxio.grpc.CommitBlockInUfsPRequest) FailedToAcquireRegisterLeaseException(alluxio.exception.FailedToAcquireRegisterLeaseException) Logger(org.slf4j.Logger) BlockStoreLocationProto(alluxio.grpc.BlockStoreLocationProto) GetRegisterLeasePResponse(alluxio.grpc.GetRegisterLeasePResponse) IOException(java.io.IOException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) ConfigProperty(alluxio.grpc.ConfigProperty) Command(alluxio.grpc.Command) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) RegisterWorkerPOptions(alluxio.grpc.RegisterWorkerPOptions) MasterClientContext(alluxio.master.MasterClientContext) CommitBlockPRequest(alluxio.grpc.CommitBlockPRequest) ServiceType(alluxio.grpc.ServiceType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) StorageList(alluxio.grpc.StorageList) StorageList(alluxio.grpc.StorageList) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) RegisterWorkerPOptions(alluxio.grpc.RegisterWorkerPOptions) HashMap(java.util.HashMap) Map(java.util.Map) LocationBlockIdListEntry(alluxio.grpc.LocationBlockIdListEntry)

Example 24 with RegisterWorkerPRequest

use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.

the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorker.

@Test
public // This can happen when a worker process is restarted.
void registerExistingWorker() throws Exception {
    long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
    List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
    prepareBlocksOnMaster(requestChunks);
    Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
    assertEquals(0, errorQueue.size());
    // Verify the worker has registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    // Register again
    Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
    assertEquals(0, newErrorQueue.size());
    // Verify the worker is registered
    MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL, updatedWorker.getBlockCount());
    assertEquals(0, updatedWorker.getToRemoveBlockCount());
    assertEquals(1, mBlockMaster.getWorkerCount());
    // Verify the worker is readable and writable
    verifyWorkerWritable(workerId);
}
Also used : MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 25 with RegisterWorkerPRequest

use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.

the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorkerBlocksAdded.

@Test
public void registerExistingWorkerBlocksAdded() throws Exception {
    long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
    // Register the worker for the 1st time
    List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
    prepareBlocksOnMaster(requestChunks);
    Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
    assertEquals(0, errorQueue.size());
    // Verify the worker has registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL, worker.getBlockCount());
    assertEquals(0, worker.getToRemoveBlockCount());
    // Generate block IDs in the same way but add some more
    Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(TIER_CONFIG));
    Set<Long> addedBlocks = addSomeBlocks(blockMap);
    // Make the master accept these blocks
    prepareBlocksOnMaster(addedBlocks);
    // Re-generate the request
    List<String> tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG));
    Map<String, Long> capacityMap = Maps.toMap(tierAliases, (tier) -> CAPACITY);
    Map<String, Long> usedMap = Maps.toMap(tierAliases, (tier) -> 0L);
    RegisterStreamer newRegisterStreamer = new RegisterStreamer(null, workerId, tierAliases, capacityMap, usedMap, blockMap, LOST_STORAGE, EMPTY_CONFIG);
    List<RegisterWorkerPRequest> newRequestChunks = ImmutableList.copyOf(newRegisterStreamer);
    int newExpectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL + addedBlocks.size()) / (double) BATCH_SIZE);
    assertEquals(newExpectedBatchCount, newRequestChunks.size());
    // Register again with the new request stream
    Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
    sendStreamToMaster(newRequestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
    assertEquals(0, newErrorQueue.size());
    // Verify the worker is registered
    assertEquals(1, mBlockMaster.getWorkerCount());
    MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
    assertEquals(TIER_BLOCK_TOTAL + addedBlocks.size(), updatedWorker.getBlockCount());
    assertEquals(0, updatedWorker.getToRemoveBlockCount());
    // No command from the master because the update is received during registration
    assertEquals(EMPTY_CMD, sendHeartbeatToMaster(workerId));
    // Verify the worker is readable and writable
    verifyWorkerWritable(workerId);
}
Also used : RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) RegisterStreamer(alluxio.worker.block.RegisterStreamer) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) StorageList(alluxio.grpc.StorageList) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) BlockStoreLocation(alluxio.worker.block.BlockStoreLocation) Test(org.junit.Test)

Aggregations

RegisterWorkerPRequest (alluxio.grpc.RegisterWorkerPRequest)30 Test (org.junit.Test)22 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)15 LocationBlockIdListEntry (alluxio.grpc.LocationBlockIdListEntry)9 BlockStoreLocation (alluxio.worker.block.BlockStoreLocation)9 MasterWorkerInfo (alluxio.master.block.meta.MasterWorkerInfo)8 StorageList (alluxio.grpc.StorageList)6 List (java.util.List)6 BlockIdList (alluxio.grpc.BlockIdList)5 BlockStoreLocationProto (alluxio.grpc.BlockStoreLocationProto)5 RegisterWorkerPResponse (alluxio.grpc.RegisterWorkerPResponse)5 RegisterStreamer (alluxio.worker.block.RegisterStreamer)5 ImmutableList (com.google.common.collect.ImmutableList)5 IOException (java.io.IOException)5 Command (alluxio.grpc.Command)4 StreamObserver (io.grpc.stub.StreamObserver)4 BlockInfoException (alluxio.exception.BlockInfoException)3 UnavailableException (alluxio.exception.status.UnavailableException)3 GetRegisterLeasePRequest (alluxio.grpc.GetRegisterLeasePRequest)3 BlockMasterWorkerServiceHandler (alluxio.master.block.BlockMasterWorkerServiceHandler)3