use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.
the class CompactDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, ArrayList<CompactTask>>> selectExecutors(CompactConfig config, List<WorkerInfo> jobWorkers, SelectExecutorsContext context) throws Exception {
Preconditions.checkState(!jobWorkers.isEmpty(), "No job worker");
AlluxioURI inputDir = new AlluxioURI(config.getInput());
AlluxioURI outputDir = new AlluxioURI(config.getOutput());
List<URIStatus> files = Lists.newArrayList();
// use double to prevent overflow
double totalFileSize = 0;
for (URIStatus status : context.getFileSystem().listStatus(inputDir)) {
if (!shouldIgnore(status)) {
files.add(status);
totalFileSize += status.getLength();
}
}
Map<WorkerInfo, ArrayList<CompactTask>> assignments = Maps.newHashMap();
int maxNumFiles = config.getMaxNumFiles();
long groupMinSize = config.getMinFileSize();
if (!files.isEmpty() && config.getInputPartitionInfo() != null) {
// adjust the group minimum size for source compression ratio
groupMinSize *= COMPRESSION_RATIO.get(config.getInputPartitionInfo().getFormat(files.get(0).getName()));
}
if (totalFileSize / groupMinSize > maxNumFiles) {
groupMinSize = Math.round(totalFileSize / maxNumFiles);
}
// Files to be compacted are grouped into different groups,
// each group of files are compacted to one file,
// one task is to compact one group of files,
// different tasks are assigned to different workers in a round robin way.
// We keep adding files to the group, until adding more files makes it too big.
ArrayList<String> group = new ArrayList<>();
int workerIndex = 0;
int outputIndex = 0;
// Number of groups already generated
int groupIndex = 0;
long currentGroupSize = 0;
long halfGroupMinSize = groupMinSize / 2;
for (URIStatus file : files) {
// 3. group size with the new file is closer to the groupMinSize than group size without it
if (group.isEmpty() || groupIndex == maxNumFiles - 1 || (currentGroupSize + file.getLength()) <= halfGroupMinSize || (Math.abs(currentGroupSize + file.getLength() - groupMinSize) <= Math.abs(currentGroupSize - groupMinSize))) {
group.add(inputDir.join(file.getName()).toString());
currentGroupSize += file.getLength();
} else {
WorkerInfo worker = jobWorkers.get(workerIndex++);
if (workerIndex == jobWorkers.size()) {
workerIndex = 0;
}
if (!assignments.containsKey(worker)) {
assignments.put(worker, new ArrayList<>());
}
ArrayList<CompactTask> tasks = assignments.get(worker);
tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex++)));
group = new ArrayList<>();
group.add(inputDir.join(file.getName()).toString());
currentGroupSize = file.getLength();
groupIndex++;
}
}
// handle the last group
if (!group.isEmpty()) {
WorkerInfo worker = jobWorkers.get(workerIndex);
if (!assignments.containsKey(worker)) {
assignments.put(worker, new ArrayList<>());
}
ArrayList<CompactTask> tasks = assignments.get(worker);
tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex)));
}
Set<Pair<WorkerInfo, ArrayList<CompactTask>>> result = Sets.newHashSet();
for (Map.Entry<WorkerInfo, ArrayList<CompactTask>> assignment : assignments.entrySet()) {
List<List<CompactTask>> partitioned = CommonUtils.partition(assignment.getValue(), TASKS_PER_WORKER);
for (List<CompactTask> compactTasks : partitioned) {
if (!compactTasks.isEmpty()) {
result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(compactTasks)));
}
}
}
return result;
}
use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.
the class LoadDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, ArrayList<LoadTask>>> selectExecutors(LoadConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) throws Exception {
Map<String, WorkerInfo> jobWorkersByAddress = jobWorkerInfoList.stream().collect(Collectors.toMap(info -> info.getAddress().getHost(), info -> info));
// Filter out workers which have no local job worker available.
List<String> missingJobWorkerHosts = new ArrayList<>();
List<BlockWorkerInfo> workers = new ArrayList<>();
for (BlockWorkerInfo worker : context.getFsContext().getCachedWorkers()) {
if (jobWorkersByAddress.containsKey(worker.getNetAddress().getHost())) {
String workerHost = worker.getNetAddress().getHost().toUpperCase();
if (!isEmptySet(config.getExcludedWorkerSet()) && config.getExcludedWorkerSet().contains(workerHost)) {
continue;
}
// If specified the locality id, the candidate worker must match one at least
boolean match = false;
if (worker.getNetAddress().getTieredIdentity().getTiers() != null) {
if (!(isEmptySet(config.getLocalityIds()) && isEmptySet(config.getExcludedLocalityIds()))) {
boolean exclude = false;
for (LocalityTier tier : worker.getNetAddress().getTieredIdentity().getTiers()) {
if (!isEmptySet(config.getExcludedLocalityIds()) && config.getExcludedLocalityIds().contains(tier.getValue().toUpperCase())) {
exclude = true;
break;
}
if (!isEmptySet(config.getLocalityIds()) && config.getLocalityIds().contains(tier.getValue().toUpperCase())) {
match = true;
break;
}
}
if (exclude) {
continue;
}
}
}
// Or user specified neither worker-set nor locality id
if ((isEmptySet(config.getWorkerSet()) && isEmptySet(config.getLocalityIds())) || match || (!isEmptySet(config.getWorkerSet()) && config.getWorkerSet().contains(workerHost))) {
workers.add(worker);
}
} else {
LOG.warn("Worker on host {} has no local job worker", worker.getNetAddress().getHost());
missingJobWorkerHosts.add(worker.getNetAddress().getHost());
}
}
// Mapping from worker to block ids which that worker is supposed to load.
Multimap<WorkerInfo, LoadTask> assignments = LinkedListMultimap.create();
AlluxioURI uri = new AlluxioURI(config.getFilePath());
for (FileBlockInfo blockInfo : context.getFileSystem().getStatus(uri).getFileBlockInfos()) {
List<BlockWorkerInfo> workersWithoutBlock = getWorkersWithoutBlock(workers, blockInfo);
int neededReplicas = config.getReplication() - blockInfo.getBlockInfo().getLocations().size();
if (workersWithoutBlock.size() < neededReplicas) {
String missingJobWorkersMessage = "";
if (!missingJobWorkerHosts.isEmpty()) {
missingJobWorkersMessage = ". The following workers could not be used because they have " + "no local job workers: " + missingJobWorkerHosts;
}
throw new FailedPreconditionException(String.format("Failed to find enough block workers to replicate to. Needed %s but only found %s. " + "Available workers without the block: %s" + missingJobWorkersMessage, neededReplicas, workersWithoutBlock.size(), workersWithoutBlock));
}
Collections.shuffle(workersWithoutBlock);
for (int i = 0; i < neededReplicas; i++) {
String address = workersWithoutBlock.get(i).getNetAddress().getHost();
WorkerInfo jobWorker = jobWorkersByAddress.get(address);
assignments.put(jobWorker, new LoadTask(blockInfo.getBlockInfo().getBlockId(), workersWithoutBlock.get(i).getNetAddress()));
}
}
Set<Pair<WorkerInfo, ArrayList<LoadTask>>> result = Sets.newHashSet();
for (Map.Entry<WorkerInfo, Collection<LoadTask>> assignment : assignments.asMap().entrySet()) {
Collection<LoadTask> loadTasks = assignment.getValue();
List<List<LoadTask>> partitionedTasks = CommonUtils.partition(Lists.newArrayList(loadTasks), JOBS_PER_WORKER);
for (List<LoadTask> tasks : partitionedTasks) {
if (!tasks.isEmpty()) {
result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(tasks)));
}
}
}
return result;
}
use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.
the class PersistDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, SerializableVoid>> selectExecutors(PersistConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) throws Exception {
if (jobWorkerInfoList.isEmpty()) {
throw new RuntimeException("No worker is available");
}
AlluxioURI uri = new AlluxioURI(config.getFilePath());
List<BlockWorkerInfo> alluxioWorkerInfoList = context.getFsContext().getCachedWorkers();
BlockWorkerInfo workerWithMostBlocks = JobUtils.getWorkerWithMostBlocks(alluxioWorkerInfoList, context.getFileSystem().getStatus(uri).getFileBlockInfos());
// Map the best Alluxio worker to a job worker.
Set<Pair<WorkerInfo, SerializableVoid>> result = Sets.newHashSet();
boolean found = false;
if (workerWithMostBlocks != null) {
for (WorkerInfo workerInfo : jobWorkerInfoList) {
if (workerInfo.getAddress().getHost().equals(workerWithMostBlocks.getNetAddress().getHost())) {
result.add(new Pair<>(workerInfo, null));
found = true;
break;
}
}
}
if (!found) {
result.add(new Pair<>(jobWorkerInfoList.get(new Random().nextInt(jobWorkerInfoList.size())), null));
}
return result;
}
use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.
the class MoveDefinition method selectExecutors.
@Override
public Set<Pair<WorkerInfo, SerializableVoid>> selectExecutors(MoveConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) {
Preconditions.checkArgument(!jobWorkerInfoList.isEmpty(), "No worker is available");
String workerHost = config.getWorkerHost();
Set<Pair<WorkerInfo, SerializableVoid>> result = Sets.newHashSet();
Collections.shuffle(jobWorkerInfoList);
for (WorkerInfo workerInfo : jobWorkerInfoList) {
// Select job workers that have this block locally to move
if (workerHost.equals(workerInfo.getAddress().getHost())) {
result.add(new Pair<>(workerInfo, null));
return result;
}
}
return result;
}
use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.
the class BlockMasterRegisterStreamIntegrationTest method verifyWorkerWritable.
// Verify the worker is writable by trying to commit a block in it
private void verifyWorkerWritable(long workerId) throws Exception {
// First see the worker usage
MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
long used = worker.getUsedBytes();
// Commit a new block to the worker
long newBlockId = 999L;
long newBlockSize = 1024L;
mBlockMaster.commitBlock(workerId, used + newBlockSize, "MEM", "MEM", newBlockId, newBlockSize);
List<WorkerInfo> workers = mBlockMaster.getWorkerInfoList();
WorkerInfo workerInfo = BlockMasterTestUtils.findWorkerInfo(workers, workerId);
BlockMasterTestUtils.verifyBlockOnWorkers(mBlockMaster, newBlockId, newBlockSize, ImmutableList.of(workerInfo));
}
Aggregations