use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.
the class RegisterStreamTestUtils method generateRegisterStreamForEmptyWorker.
public static List<RegisterWorkerPRequest> generateRegisterStreamForEmptyWorker(long workerId) {
String tierConfig = "";
// Generate block IDs heuristically
Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(tierConfig));
RegisterStreamer registerStreamer = new RegisterStreamer(null, workerId, ImmutableList.of("MEM"), MEM_CAPACITY, MEM_USAGE_EMPTY, blockMap, LOST_STORAGE, EMPTY_CONFIG);
// For an empty worker there is only 1 request
List<RegisterWorkerPRequest> requestChunks = ImmutableList.copyOf(registerStreamer);
assertEquals(1, requestChunks.size());
return requestChunks;
}
use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.
the class RegisterStreamer method registerInternal.
private void registerInternal() throws InterruptedException, DeadlineExceededException, CancelledException, InternalException {
int iter = 0;
while (hasNext()) {
// Send a request when the master ACKs the previous one
LOG.debug("Worker {} - Acquiring one token to send the next batch", mWorkerId);
Instant start = Instant.now();
if (!mBucket.tryAcquire(mResponseTimeoutMs, TimeUnit.MILLISECONDS)) {
throw new DeadlineExceededException(String.format("No response from master for more than %dms during the stream!", mResponseTimeoutMs));
}
Instant end = Instant.now();
LOG.debug("Worker {} - master ACK received in {}ms, sending the next batch {}", mWorkerId, Duration.between(start, end).toMillis(), iter);
// Send the request
RegisterWorkerPRequest request = next();
mWorkerRequestObserver.onNext(request);
if (mFinishLatch.getCount() == 0) {
abort();
}
iter++;
}
// If the master side is closed before the client side, there is a problem
if (mFinishLatch.getCount() == 0) {
abort();
}
// Wait for all batches have been ACK-ed by the master before completing the client side
if (!mAckLatch.await(mResponseTimeoutMs * MAX_BATCHES_IN_FLIGHT, TimeUnit.MILLISECONDS)) {
long receivedCount = mBlockMapIterator.getBatchCount() - mAckLatch.getCount();
throw new DeadlineExceededException(String.format("All batches have been sent to the master but only received %d ACKs!", receivedCount));
}
LOG.info("Worker {} - All requests have been sent. Completing the client side.", mWorkerId);
mWorkerRequestObserver.onCompleted();
LOG.info("Worker {} - Waiting on the master side to complete", mWorkerId);
if (!mFinishLatch.await(mCompleteTimeoutMs, TimeUnit.MILLISECONDS)) {
throw new DeadlineExceededException(String.format("All batches have been received by the master but the master failed" + " to complete the registration in %dms!", mCompleteTimeoutMs));
}
// If the master failed in completing the request, there will also be an error
if (mError.get() != null) {
Throwable t = mError.get();
LOG.error("Worker {} - Received an error from the master on completion", mWorkerId, t);
throw new InternalException(t);
}
LOG.info("Worker {} - Finished registration with a stream", mWorkerId);
}
use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.
the class BlockMasterClient method register.
/**
* The method the worker should execute to register with the block master.
*
* @param workerId the worker id of the worker registering
* @param storageTierAliases a list of storage tier aliases in ordinal order
* @param totalBytesOnTiers mapping from storage tier alias to total bytes
* @param usedBytesOnTiers mapping from storage tier alias to used bytes
* @param currentBlocksOnLocation mapping from storage tier alias to the list of list of blocks
* @param lostStorage mapping from storage tier alias to the list of lost storage paths
* @param configList a list of configurations
*/
// TODO(yupeng): rename to workerBlockReport or workerInitialize?
public void register(final long workerId, final List<String> storageTierAliases, final Map<String, Long> totalBytesOnTiers, final Map<String, Long> usedBytesOnTiers, final Map<BlockStoreLocation, List<Long>> currentBlocksOnLocation, final Map<String, List<String>> lostStorage, final List<ConfigProperty> configList) throws IOException {
final RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList).build();
final List<LocationBlockIdListEntry> currentBlocks = convertBlockListMapToProto(currentBlocksOnLocation);
final Map<String, StorageList> lostStorageMap = lostStorage.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> StorageList.newBuilder().addAllStorage(e.getValue()).build()));
final RegisterWorkerPRequest request = RegisterWorkerPRequest.newBuilder().setWorkerId(workerId).addAllStorageTiers(storageTierAliases).putAllTotalBytesOnTiers(totalBytesOnTiers).putAllUsedBytesOnTiers(usedBytesOnTiers).addAllCurrentBlocks(currentBlocks).putAllLostStorage(lostStorageMap).setOptions(options).build();
retryRPC(() -> {
mClient.registerWorker(request);
return null;
}, LOG, "Register", "workerId=%d", workerId);
}
use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorker.
@Test
public // This can happen when a worker process is restarted.
void registerExistingWorker() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
// Verify the worker has registered
assertEquals(1, mBlockMaster.getWorkerCount());
// Register again
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
assertEquals(0, newErrorQueue.size());
// Verify the worker is registered
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, updatedWorker.getBlockCount());
assertEquals(0, updatedWorker.getToRemoveBlockCount());
assertEquals(1, mBlockMaster.getWorkerCount());
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
use of alluxio.grpc.RegisterWorkerPRequest in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method registerExistingWorkerBlocksAdded.
@Test
public void registerExistingWorkerBlocksAdded() throws Exception {
long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1);
// Register the worker for the 1st time
List<RegisterWorkerPRequest> requestChunks = RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId);
prepareBlocksOnMaster(requestChunks);
Queue<Throwable> errorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(requestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue));
assertEquals(0, errorQueue.size());
// Verify the worker has registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL, worker.getBlockCount());
assertEquals(0, worker.getToRemoveBlockCount());
// Generate block IDs in the same way but add some more
Map<BlockStoreLocation, List<Long>> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(TIER_CONFIG));
Set<Long> addedBlocks = addSomeBlocks(blockMap);
// Make the master accept these blocks
prepareBlocksOnMaster(addedBlocks);
// Re-generate the request
List<String> tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG));
Map<String, Long> capacityMap = Maps.toMap(tierAliases, (tier) -> CAPACITY);
Map<String, Long> usedMap = Maps.toMap(tierAliases, (tier) -> 0L);
RegisterStreamer newRegisterStreamer = new RegisterStreamer(null, workerId, tierAliases, capacityMap, usedMap, blockMap, LOST_STORAGE, EMPTY_CONFIG);
List<RegisterWorkerPRequest> newRequestChunks = ImmutableList.copyOf(newRegisterStreamer);
int newExpectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL + addedBlocks.size()) / (double) BATCH_SIZE);
assertEquals(newExpectedBatchCount, newRequestChunks.size());
// Register again with the new request stream
Queue<Throwable> newErrorQueue = new ConcurrentLinkedQueue<>();
sendStreamToMaster(newRequestChunks, RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue));
assertEquals(0, newErrorQueue.size());
// Verify the worker is registered
assertEquals(1, mBlockMaster.getWorkerCount());
MasterWorkerInfo updatedWorker = mBlockMaster.getWorker(workerId);
assertEquals(TIER_BLOCK_TOTAL + addedBlocks.size(), updatedWorker.getBlockCount());
assertEquals(0, updatedWorker.getToRemoveBlockCount());
// No command from the master because the update is received during registration
assertEquals(EMPTY_CMD, sendHeartbeatToMaster(workerId));
// Verify the worker is readable and writable
verifyWorkerWritable(workerId);
}
Aggregations