Search in sources :

Example 26 with WorkerInfo

use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.

the class CompactDefinition method selectExecutors.

@Override
public Set<Pair<WorkerInfo, ArrayList<CompactTask>>> selectExecutors(CompactConfig config, List<WorkerInfo> jobWorkers, SelectExecutorsContext context) throws Exception {
    Preconditions.checkState(!jobWorkers.isEmpty(), "No job worker");
    AlluxioURI inputDir = new AlluxioURI(config.getInput());
    AlluxioURI outputDir = new AlluxioURI(config.getOutput());
    List<URIStatus> files = Lists.newArrayList();
    // use double to prevent overflow
    double totalFileSize = 0;
    for (URIStatus status : context.getFileSystem().listStatus(inputDir)) {
        if (!shouldIgnore(status)) {
            files.add(status);
            totalFileSize += status.getLength();
        }
    }
    Map<WorkerInfo, ArrayList<CompactTask>> assignments = Maps.newHashMap();
    int maxNumFiles = config.getMaxNumFiles();
    long groupMinSize = config.getMinFileSize();
    if (!files.isEmpty() && config.getInputPartitionInfo() != null) {
        // adjust the group minimum size for source compression ratio
        groupMinSize *= COMPRESSION_RATIO.get(config.getInputPartitionInfo().getFormat(files.get(0).getName()));
    }
    if (totalFileSize / groupMinSize > maxNumFiles) {
        groupMinSize = Math.round(totalFileSize / maxNumFiles);
    }
    // Files to be compacted are grouped into different groups,
    // each group of files are compacted to one file,
    // one task is to compact one group of files,
    // different tasks are assigned to different workers in a round robin way.
    // We keep adding files to the group, until adding more files makes it too big.
    ArrayList<String> group = new ArrayList<>();
    int workerIndex = 0;
    int outputIndex = 0;
    // Number of groups already generated
    int groupIndex = 0;
    long currentGroupSize = 0;
    long halfGroupMinSize = groupMinSize / 2;
    for (URIStatus file : files) {
        // 3. group size with the new file is closer to the groupMinSize than group size without it
        if (group.isEmpty() || groupIndex == maxNumFiles - 1 || (currentGroupSize + file.getLength()) <= halfGroupMinSize || (Math.abs(currentGroupSize + file.getLength() - groupMinSize) <= Math.abs(currentGroupSize - groupMinSize))) {
            group.add(inputDir.join(file.getName()).toString());
            currentGroupSize += file.getLength();
        } else {
            WorkerInfo worker = jobWorkers.get(workerIndex++);
            if (workerIndex == jobWorkers.size()) {
                workerIndex = 0;
            }
            if (!assignments.containsKey(worker)) {
                assignments.put(worker, new ArrayList<>());
            }
            ArrayList<CompactTask> tasks = assignments.get(worker);
            tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex++)));
            group = new ArrayList<>();
            group.add(inputDir.join(file.getName()).toString());
            currentGroupSize = file.getLength();
            groupIndex++;
        }
    }
    // handle the last group
    if (!group.isEmpty()) {
        WorkerInfo worker = jobWorkers.get(workerIndex);
        if (!assignments.containsKey(worker)) {
            assignments.put(worker, new ArrayList<>());
        }
        ArrayList<CompactTask> tasks = assignments.get(worker);
        tasks.add(new CompactTask(group, getOutputPath(outputDir, outputIndex)));
    }
    Set<Pair<WorkerInfo, ArrayList<CompactTask>>> result = Sets.newHashSet();
    for (Map.Entry<WorkerInfo, ArrayList<CompactTask>> assignment : assignments.entrySet()) {
        List<List<CompactTask>> partitioned = CommonUtils.partition(assignment.getValue(), TASKS_PER_WORKER);
        for (List<CompactTask> compactTasks : partitioned) {
            if (!compactTasks.isEmpty()) {
                result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(compactTasks)));
            }
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) WorkerInfo(alluxio.wire.WorkerInfo) URIStatus(alluxio.client.file.URIStatus) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) AlluxioURI(alluxio.AlluxioURI) Pair(alluxio.collections.Pair)

Example 27 with WorkerInfo

use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.

the class LoadDefinition method selectExecutors.

@Override
public Set<Pair<WorkerInfo, ArrayList<LoadTask>>> selectExecutors(LoadConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) throws Exception {
    Map<String, WorkerInfo> jobWorkersByAddress = jobWorkerInfoList.stream().collect(Collectors.toMap(info -> info.getAddress().getHost(), info -> info));
    // Filter out workers which have no local job worker available.
    List<String> missingJobWorkerHosts = new ArrayList<>();
    List<BlockWorkerInfo> workers = new ArrayList<>();
    for (BlockWorkerInfo worker : context.getFsContext().getCachedWorkers()) {
        if (jobWorkersByAddress.containsKey(worker.getNetAddress().getHost())) {
            String workerHost = worker.getNetAddress().getHost().toUpperCase();
            if (!isEmptySet(config.getExcludedWorkerSet()) && config.getExcludedWorkerSet().contains(workerHost)) {
                continue;
            }
            // If specified the locality id, the candidate worker must match one at least
            boolean match = false;
            if (worker.getNetAddress().getTieredIdentity().getTiers() != null) {
                if (!(isEmptySet(config.getLocalityIds()) && isEmptySet(config.getExcludedLocalityIds()))) {
                    boolean exclude = false;
                    for (LocalityTier tier : worker.getNetAddress().getTieredIdentity().getTiers()) {
                        if (!isEmptySet(config.getExcludedLocalityIds()) && config.getExcludedLocalityIds().contains(tier.getValue().toUpperCase())) {
                            exclude = true;
                            break;
                        }
                        if (!isEmptySet(config.getLocalityIds()) && config.getLocalityIds().contains(tier.getValue().toUpperCase())) {
                            match = true;
                            break;
                        }
                    }
                    if (exclude) {
                        continue;
                    }
                }
            }
            // Or user specified neither worker-set nor locality id
            if ((isEmptySet(config.getWorkerSet()) && isEmptySet(config.getLocalityIds())) || match || (!isEmptySet(config.getWorkerSet()) && config.getWorkerSet().contains(workerHost))) {
                workers.add(worker);
            }
        } else {
            LOG.warn("Worker on host {} has no local job worker", worker.getNetAddress().getHost());
            missingJobWorkerHosts.add(worker.getNetAddress().getHost());
        }
    }
    // Mapping from worker to block ids which that worker is supposed to load.
    Multimap<WorkerInfo, LoadTask> assignments = LinkedListMultimap.create();
    AlluxioURI uri = new AlluxioURI(config.getFilePath());
    for (FileBlockInfo blockInfo : context.getFileSystem().getStatus(uri).getFileBlockInfos()) {
        List<BlockWorkerInfo> workersWithoutBlock = getWorkersWithoutBlock(workers, blockInfo);
        int neededReplicas = config.getReplication() - blockInfo.getBlockInfo().getLocations().size();
        if (workersWithoutBlock.size() < neededReplicas) {
            String missingJobWorkersMessage = "";
            if (!missingJobWorkerHosts.isEmpty()) {
                missingJobWorkersMessage = ". The following workers could not be used because they have " + "no local job workers: " + missingJobWorkerHosts;
            }
            throw new FailedPreconditionException(String.format("Failed to find enough block workers to replicate to. Needed %s but only found %s. " + "Available workers without the block: %s" + missingJobWorkersMessage, neededReplicas, workersWithoutBlock.size(), workersWithoutBlock));
        }
        Collections.shuffle(workersWithoutBlock);
        for (int i = 0; i < neededReplicas; i++) {
            String address = workersWithoutBlock.get(i).getNetAddress().getHost();
            WorkerInfo jobWorker = jobWorkersByAddress.get(address);
            assignments.put(jobWorker, new LoadTask(blockInfo.getBlockInfo().getBlockId(), workersWithoutBlock.get(i).getNetAddress()));
        }
    }
    Set<Pair<WorkerInfo, ArrayList<LoadTask>>> result = Sets.newHashSet();
    for (Map.Entry<WorkerInfo, Collection<LoadTask>> assignment : assignments.asMap().entrySet()) {
        Collection<LoadTask> loadTasks = assignment.getValue();
        List<List<LoadTask>> partitionedTasks = CommonUtils.partition(Lists.newArrayList(loadTasks), JOBS_PER_WORKER);
        for (List<LoadTask> tasks : partitionedTasks) {
            if (!tasks.isEmpty()) {
                result.add(new Pair<>(assignment.getKey(), Lists.newArrayList(tasks)));
            }
        }
    }
    return result;
}
Also used : FailedPreconditionException(alluxio.exception.status.FailedPreconditionException) WorkerNetAddress(alluxio.wire.WorkerNetAddress) LoggerFactory(org.slf4j.LoggerFactory) BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) FileBlockInfo(alluxio.wire.FileBlockInfo) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) AbstractVoidPlanDefinition(alluxio.job.plan.AbstractVoidPlanDefinition) JobUtils(alluxio.job.util.JobUtils) SerializableVoid(alluxio.job.util.SerializableVoid) Lists(com.google.common.collect.Lists) Constants(alluxio.Constants) RunTaskContext(alluxio.job.RunTaskContext) WorkerInfo(alluxio.wire.WorkerInfo) AlluxioURI(alluxio.AlluxioURI) Map(java.util.Map) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) LocalityTier(alluxio.wire.TieredIdentity.LocalityTier) Logger(org.slf4j.Logger) Collection(java.util.Collection) MoreObjects(com.google.common.base.MoreObjects) Set(java.util.Set) SelectExecutorsContext(alluxio.job.SelectExecutorsContext) Pair(alluxio.collections.Pair) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Serializable(java.io.Serializable) LoadTask(alluxio.job.plan.load.LoadDefinition.LoadTask) BlockLocation(alluxio.wire.BlockLocation) URIStatus(alluxio.client.file.URIStatus) List(java.util.List) Collections(java.util.Collections) CommonUtils(alluxio.util.CommonUtils) NotThreadSafe(javax.annotation.concurrent.NotThreadSafe) ArrayList(java.util.ArrayList) BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) WorkerInfo(alluxio.wire.WorkerInfo) FileBlockInfo(alluxio.wire.FileBlockInfo) ArrayList(java.util.ArrayList) List(java.util.List) Pair(alluxio.collections.Pair) LocalityTier(alluxio.wire.TieredIdentity.LocalityTier) LoadTask(alluxio.job.plan.load.LoadDefinition.LoadTask) BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) FailedPreconditionException(alluxio.exception.status.FailedPreconditionException) Collection(java.util.Collection) Map(java.util.Map) AlluxioURI(alluxio.AlluxioURI)

Example 28 with WorkerInfo

use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.

the class PersistDefinition method selectExecutors.

@Override
public Set<Pair<WorkerInfo, SerializableVoid>> selectExecutors(PersistConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) throws Exception {
    if (jobWorkerInfoList.isEmpty()) {
        throw new RuntimeException("No worker is available");
    }
    AlluxioURI uri = new AlluxioURI(config.getFilePath());
    List<BlockWorkerInfo> alluxioWorkerInfoList = context.getFsContext().getCachedWorkers();
    BlockWorkerInfo workerWithMostBlocks = JobUtils.getWorkerWithMostBlocks(alluxioWorkerInfoList, context.getFileSystem().getStatus(uri).getFileBlockInfos());
    // Map the best Alluxio worker to a job worker.
    Set<Pair<WorkerInfo, SerializableVoid>> result = Sets.newHashSet();
    boolean found = false;
    if (workerWithMostBlocks != null) {
        for (WorkerInfo workerInfo : jobWorkerInfoList) {
            if (workerInfo.getAddress().getHost().equals(workerWithMostBlocks.getNetAddress().getHost())) {
                result.add(new Pair<>(workerInfo, null));
                found = true;
                break;
            }
        }
    }
    if (!found) {
        result.add(new Pair<>(jobWorkerInfoList.get(new Random().nextInt(jobWorkerInfoList.size())), null));
    }
    return result;
}
Also used : Random(java.util.Random) BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) WorkerInfo(alluxio.wire.WorkerInfo) AlluxioURI(alluxio.AlluxioURI) Pair(alluxio.collections.Pair)

Example 29 with WorkerInfo

use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.

the class MoveDefinition method selectExecutors.

@Override
public Set<Pair<WorkerInfo, SerializableVoid>> selectExecutors(MoveConfig config, List<WorkerInfo> jobWorkerInfoList, SelectExecutorsContext context) {
    Preconditions.checkArgument(!jobWorkerInfoList.isEmpty(), "No worker is available");
    String workerHost = config.getWorkerHost();
    Set<Pair<WorkerInfo, SerializableVoid>> result = Sets.newHashSet();
    Collections.shuffle(jobWorkerInfoList);
    for (WorkerInfo workerInfo : jobWorkerInfoList) {
        // Select job workers that have this block locally to move
        if (workerHost.equals(workerInfo.getAddress().getHost())) {
            result.add(new Pair<>(workerInfo, null));
            return result;
        }
    }
    return result;
}
Also used : BlockWorkerInfo(alluxio.client.block.BlockWorkerInfo) WorkerInfo(alluxio.wire.WorkerInfo) Pair(alluxio.collections.Pair)

Example 30 with WorkerInfo

use of alluxio.wire.WorkerInfo in project alluxio by Alluxio.

the class BlockMasterRegisterStreamIntegrationTest method verifyWorkerWritable.

// Verify the worker is writable by trying to commit a block in it
private void verifyWorkerWritable(long workerId) throws Exception {
    // First see the worker usage
    MasterWorkerInfo worker = mBlockMaster.getWorker(workerId);
    long used = worker.getUsedBytes();
    // Commit a new block to the worker
    long newBlockId = 999L;
    long newBlockSize = 1024L;
    mBlockMaster.commitBlock(workerId, used + newBlockSize, "MEM", "MEM", newBlockId, newBlockSize);
    List<WorkerInfo> workers = mBlockMaster.getWorkerInfoList();
    WorkerInfo workerInfo = BlockMasterTestUtils.findWorkerInfo(workers, workerId);
    BlockMasterTestUtils.verifyBlockOnWorkers(mBlockMaster, newBlockId, newBlockSize, ImmutableList.of(workerInfo));
}
Also used : MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) WorkerInfo(alluxio.wire.WorkerInfo) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo)

Aggregations

WorkerInfo (alluxio.wire.WorkerInfo)66 Test (org.junit.Test)31 ArrayList (java.util.ArrayList)18 Pair (alluxio.collections.Pair)17 BlockMasterTestUtils.findWorkerInfo (alluxio.master.block.BlockMasterTestUtils.findWorkerInfo)14 CountDownLatch (java.util.concurrent.CountDownLatch)14 SelectExecutorsContext (alluxio.job.SelectExecutorsContext)12 BlockWorkerInfo (alluxio.client.block.BlockWorkerInfo)11 Command (alluxio.grpc.Command)11 AlluxioURI (alluxio.AlluxioURI)9 Map (java.util.Map)9 BlockInfo (alluxio.wire.BlockInfo)8 BlockLocation (alluxio.wire.BlockLocation)8 WorkerNetAddress (alluxio.wire.WorkerNetAddress)8 URIStatus (alluxio.client.file.URIStatus)7 HashMap (java.util.HashMap)7 List (java.util.List)7 HashSet (java.util.HashSet)6 UnavailableException (alluxio.exception.status.UnavailableException)5 FileBlockInfo (alluxio.wire.FileBlockInfo)5