use of alluxio.job.SelectExecutorsContext in project alluxio by Alluxio.
the class StressBenchDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, ArrayList<String>>> selectExecutors(StressBenchConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) {
Set<Pair<WorkerInfo, ArrayList<String>>> result = Sets.newHashSet();
// sort copy of workers by hashcode
List<WorkerInfo> workerList = Lists.newArrayList(jobWorkerInfoList);
workerList.sort(Comparator.comparing(w -> w.getAddress().getHost()));
// take the first subset, according to cluster limit
int clusterLimit = config.getClusterLimit();
if (clusterLimit == 0) {
clusterLimit = workerList.size();
}
if (clusterLimit < 0) {
// if negative, reverse the list
clusterLimit = -clusterLimit;
Collections.reverse(workerList);
}
workerList = workerList.subList(0, clusterLimit);
for (WorkerInfo worker : workerList) {
LOG.info("Generating job for worker {}", worker.getId());
ArrayList<String> args = new ArrayList<>(2);
// Add the worker hostname + worker id as the unique task id for each distributed task.
// The worker id is used since there may be multiple workers on a single host.
args.add(BaseParameters.ID_FLAG);
args.add(worker.getAddress().getHost() + "-" + worker.getId());
result.add(new Pair<>(worker, args));
}
return result;
}
use of alluxio.job.SelectExecutorsContext in project alluxio by Alluxio.
the class LoadDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, ArrayList<LoadTask>>> selectExecutors(LoadConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) throws Exception {
Map<String, WorkerInfo> jobWorkersByAddress = jobWorkerInfoList.stream().collect(Collectors.toMap(info -> info.getAddress().getHost(), info -> info));
// Filter out workers which have no local job worker available.
List<String> missingJobWorkerHosts = new ArrayList<>();
List<BlockWorkerInfo> workers = new ArrayList<>();
for (BlockWorkerInfo worker : context.getFsContext().getCachedWorkers()) {
if (jobWorkersByAddress.containsKey(worker.getNetAddress().getHost())) {
String workerHost = worker.getNetAddress().getHost().toUpperCase();
if (!isEmptySet(config.getExcludedWorkerSet()) && config.getExcludedWorkerSet().contains(workerHost)) {
continue;
}
// If specified the locality id, the candidate worker must match one at least
boolean match = false;
if (worker.getNetAddress().getTieredIdentity().getTiers() != null) {
if (!(isEmptySet(config.getLocalityIds()) && isEmptySet(config.getExcludedLocalityIds()))) {
boolean exclude = false;
for (LocalityTier tier : worker.getNetAddress().getTieredIdentity().getTiers()) {
if (!isEmptySet(config.getExcludedLocalityIds()) && config.getExcludedLocalityIds().contains(tier.getValue().toUpperCase())) {
exclude = true;
break;
}
if (!isEmptySet(config.getLocalityIds()) && config.getLocalityIds().contains(tier.getValue().toUpperCase())) {
match = true;
break;
}
}
if (exclude) {
continue;
}
}
}
// Or user specified neither worker-set nor locality id
if ((isEmptySet(config.getWorkerSet()) && isEmptySet(config.getLocalityIds())) || match || (!isEmptySet(config.getWorkerSet()) && config.getWorkerSet().contains(workerHost))) {
workers.add(worker);
}
} else {
LOG.warn("Worker on host {} has no local job worker", worker.getNetAddress().getHost());
missingJobWorkerHosts.add(worker.getNetAddress().getHost());
}
}
// Mapping from worker to block ids which that worker is supposed to load.
Multimap<WorkerInfo, LoadTask> assignments = LinkedListMultimap.create();
AlluxioURI uri = new AlluxioURI(config.getFilePath());
for (FileBlockInfo blockInfo : context.getFileSystem().getStatus(uri).getFileBlockInfos()) {
List<BlockWorkerInfo> workersWithoutBlock = getWorkersWithoutBlock(workers, blockInfo);
int neededReplicas = config.getReplication() - blockInfo.getBlockInfo().getLocations().size();
if (workersWithoutBlock.size() < neededReplicas) {
String missingJobWorkersMessage = "";
if (!missingJobWorkerHosts.isEmpty()) {
missingJobWorkersMessage = ". The following workers could not be used because they have " + "no local job workers: " + missingJobWorkerHosts;
}
throw new FailedPreconditionException(String.format("Failed to find enough block workers to replicate to. Needed %s but only found %s. " + "Available workers without the block: %s" + missingJobWorkersMessage, neededReplicas, workersWithoutBlock.size(), workersWithoutBlock));
}
Collections.shuffle(workersWithoutBlock);
for (int i = 0; i < neededReplicas; i++) {
String address = workersWithoutBlock.get(i).getNetAddress().getHost();
WorkerInfo jobWorker = jobWorkersByAddress.get(address);
assignments.put(jobWorker, new LoadTask(blockInfo.getBlockInfo().getBlockId(), workersWithoutBlock.get(i).getNetAddress()));
}
}
Set<Pair<WorkerInfo, ArrayList<LoadTask>>> result = Sets.newHashSet();
for (Map.Entry<WorkerInfo, Collection<LoadTask>> assignment : assignments.asMap().entrySet()) {
Collection<LoadTask> loadTasks = assignment.getValue();
List<List<LoadTask>> partitionedTasks = CommonUtils.partition(Lists.newArrayList(loadTasks), JOBS_PER_WORKER);
for (List<LoadTask> tasks : partitionedTasks) {
if (!tasks.isEmpty()) {
result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(tasks)));
}
}
}
return result;
}
use of alluxio.job.SelectExecutorsContext in project alluxio by Alluxio.
the class ReplicateDefinitionTest method selectExecutorsTestHelper.
/**
* Helper function to select executors.
*
* @param numReplicas how many replicas to replicate or evict
* @param workerInfoList a list of current available job workers
* @return the selection result
*/
private Set<Pair<WorkerInfo, SerializableVoid>> selectExecutorsTestHelper(int numReplicas, List<WorkerInfo> workerInfoList) throws Exception {
ReplicateConfig config = new ReplicateConfig(TEST_PATH, TEST_BLOCK_ID, numReplicas);
ReplicateDefinition definition = new ReplicateDefinition();
return definition.selectExecutors(config, workerInfoList, new SelectExecutorsContext(1, mMockJobServerContext));
}
use of alluxio.job.SelectExecutorsContext in project alluxio by Alluxio.
the class CompactDefinitionSelectExecutorsTest method testExecutorsParallel.
@Test
public void testExecutorsParallel() throws Exception {
int tasksPerWorker = 10;
int numCompactedFiles = 100;
int totalFiles = 5000;
PartitionInfo mockPartitionInfo = mock(PartitionInfo.class);
when(mockPartitionInfo.getFormat(any())).thenReturn(Format.CSV);
CompactConfig config = new CompactConfig(mockPartitionInfo, INPUT_DIR, mockPartitionInfo, OUTPUT_DIR, numCompactedFiles, 2 * FileUtils.ONE_GB);
List<URIStatus> inputFiles = new ArrayList<>();
for (int i = 0; i < totalFiles; i++) {
inputFiles.add(newFile(Integer.toString(i)));
}
when(mMockFileSystem.listStatus(new AlluxioURI(INPUT_DIR))).thenReturn(inputFiles);
Set<Pair<WorkerInfo, ArrayList<CompactTask>>> result = new CompactDefinition().selectExecutors(config, SelectExecutorsTest.JOB_WORKERS, new SelectExecutorsContext(1, new JobServerContext(mMockFileSystem, mMockFileSystemContext, mMockUfsManager)));
assertEquals(JOB_WORKERS.size() * tasksPerWorker, result.size());
int allCompactTasks = 0;
for (Pair<WorkerInfo, ArrayList<CompactTask>> tasks : result) {
allCompactTasks += tasks.getSecond().size();
}
assertEquals(numCompactedFiles, allCompactTasks);
}
use of alluxio.job.SelectExecutorsContext in project alluxio by Alluxio.
the class LoadDefinitionTest method loadedBySpecifiedHost.
private void loadedBySpecifiedHost(Set<String> workerSet, Set<String> excludedWorkerSet, Set<String> localityIds, Set<String> excludedLocalityIds, Set<Long> workerIds) throws Exception {
int numBlocks = 10;
createFileWithNoLocations(TEST_URI, numBlocks);
LoadConfig config = new LoadConfig(TEST_URI, 1, workerSet, excludedWorkerSet, localityIds, excludedLocalityIds, false);
Set<Pair<WorkerInfo, ArrayList<LoadTask>>> assignments = new LoadDefinition().selectExecutors(config, JOB_WORKERS, new SelectExecutorsContext(1, mJobServerContext));
// Check that we are loading the right number of blocks.
int totalBlockLoads = 0;
for (Pair<WorkerInfo, ArrayList<LoadTask>> assignment : assignments) {
totalBlockLoads += assignment.getSecond().size();
Assert.assertTrue(workerIds.contains(assignment.getFirst().getId()));
}
Assert.assertEquals(numBlocks, totalBlockLoads);
}
Aggregations