Search in sources :

Example 1 with INcCollection

use of org.apache.hyracks.hdfs.api.INcCollection in project asterixdb by apache.

the class RackAwareNcCollectionBuilder method build.

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) {
    try {
        final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>();
        for (int i = 0; i < NCs.length; i++) {
            List<Integer> path = new ArrayList<Integer>();
            String ipAddress = InetAddress.getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()).getHostAddress();
            topology.lookupNetworkTerminal(ipAddress, path);
            if (path.size() <= 0) {
                // if the hyracks nc is not in the defined cluster
                path.add(Integer.MIN_VALUE);
                LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!");
            }
            List<String> ncs = pathToNCs.get(path);
            if (ncs == null) {
                ncs = new ArrayList<String>();
                pathToNCs.put(path, ncs);
            }
            ncs.add(NCs[i]);
        }
        final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(new Comparator<List<Integer>>() {

            @Override
            public int compare(List<Integer> l1, List<Integer> l2) {
                int commonLength = Math.min(l1.size(), l2.size());
                for (int i = 0; i < commonLength; i++) {
                    Integer value1 = l1.get(i);
                    Integer value2 = l2.get(i);
                    int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
                    if (cmp != 0) {
                        return cmp;
                    }
                }
                return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
            }
        });
        for (int i = 0; i < workloads.length; i++) {
            if (workloads[i] < slotLimit) {
                List<Integer> path = new ArrayList<Integer>();
                String ipAddress = InetAddress.getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()).getHostAddress();
                topology.lookupNetworkTerminal(ipAddress, path);
                if (path.size() <= 0) {
                    // if the hyracks nc is not in the defined cluster
                    path.add(Integer.MIN_VALUE);
                }
                IntWritable availableSlot = availableIpsToSlots.get(path);
                if (availableSlot == null) {
                    availableSlot = new IntWritable(slotLimit - workloads[i]);
                    availableIpsToSlots.put(path, availableSlot);
                } else {
                    availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
                }
            }
        }
        return new INcCollection() {

            @Override
            public String findNearestAvailableSlot(InputSplit split) {
                try {
                    String[] locs = split.getLocations();
                    int minDistance = Integer.MAX_VALUE;
                    List<Integer> currentCandidatePath = null;
                    if (locs == null || locs.length > 0) {
                        for (int j = 0; j < locs.length; j++) {
                            /**
                                 * get all the IP addresses from the name
                                 */
                            InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                            boolean inTopology = false;
                            for (InetAddress ip : allIps) {
                                List<Integer> splitPath = new ArrayList<Integer>();
                                boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath);
                                if (!inCluster) {
                                    continue;
                                }
                                inTopology = true;
                                /**
                                     * if the node controller exists
                                     */
                                List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
                                if (candidatePath == null) {
                                    candidatePath = availableIpsToSlots.ceilingKey(splitPath);
                                }
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        int distance = distance(splitPath, candidatePath);
                                        if (minDistance > distance) {
                                            minDistance = distance;
                                            currentCandidatePath = candidatePath;
                                        }
                                    }
                                }
                            }
                            if (!inTopology) {
                                LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!");
                                /**
                                     * if the machine is not in the toplogy file
                                     */
                                List<Integer> candidatePath = null;
                                for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                                    if (entry.getValue().get() > 0) {
                                        candidatePath = entry.getKey();
                                        break;
                                    }
                                }
                                /** the split path is empty */
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        currentCandidatePath = candidatePath;
                                    }
                                }
                            }
                        }
                    } else {
                        for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                            if (entry.getValue().get() > 0) {
                                currentCandidatePath = entry.getKey();
                                break;
                            }
                        }
                    }
                    if (currentCandidatePath != null && currentCandidatePath.size() > 0) {
                        /**
                             * Update the entry of the selected IP
                             */
                        IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
                        availableSlot.set(availableSlot.get() - 1);
                        if (availableSlot.get() == 0) {
                            availableIpsToSlots.remove(currentCandidatePath);
                        }
                        /**
                             * Update the entry of the selected NC
                             */
                        List<String> candidateNcs = pathToNCs.get(currentCandidatePath);
                        for (String candidate : candidateNcs) {
                            int ncIndex = ncNameToIndex.get(candidate);
                            if (workloads[ncIndex] < slotLimit) {
                                return candidate;
                            }
                        }
                    }
                    /** not scheduled */
                    return null;
                } catch (Exception e) {
                    throw new IllegalStateException(e);
                }
            }

            @Override
            public int numAvailableSlots() {
                return availableIpsToSlots.size();
            }

            private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
                int commonLength = Math.min(splitPath.size(), candidatePath.size());
                int distance = 0;
                for (int i = 0; i < commonLength; i++) {
                    distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i));
                }
                List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath : candidatePath;
                for (int i = commonLength; i < restElements.size(); i++) {
                    distance = distance * 100 + Math.abs(restElements.get(i));
                }
                return distance;
            }
        };
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) INcCollection(org.apache.hyracks.hdfs.api.INcCollection) TreeMap(java.util.TreeMap) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.hadoop.mapred.InputSplit) InetAddress(java.net.InetAddress) IntWritable(org.apache.hadoop.io.IntWritable)

Example 2 with INcCollection

use of org.apache.hyracks.hdfs.api.INcCollection in project asterixdb by apache.

the class IPProximityNcCollectionBuilder method build.

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) {
    final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>();
    for (int i = 0; i < workloads.length; i++) {
        if (workloads[i] < slotLimit) {
            byte[] rawip;
            try {
                rawip = ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress();
            } catch (UnknownHostException e) {
                // QQQ Should probably have a neater solution than this
                throw new RuntimeException(e);
            }
            BytesWritable ip = new BytesWritable(rawip);
            IntWritable availableSlot = availableIpsToSlots.get(ip);
            if (availableSlot == null) {
                availableSlot = new IntWritable(slotLimit - workloads[i]);
                availableIpsToSlots.put(ip, availableSlot);
            } else {
                availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
            }
        }
    }
    return new INcCollection() {

        @Override
        public String findNearestAvailableSlot(InputSplit split) {
            try {
                String[] locs = split.getLocations();
                int minDistance = Integer.MAX_VALUE;
                BytesWritable currentCandidateIp = null;
                if (locs == null || locs.length > 0) {
                    for (int j = 0; j < locs.length; j++) {
                        /**
                             * get all the IP addresses from the name
                             */
                        InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                        for (InetAddress ip : allIps) {
                            BytesWritable splitIp = new BytesWritable(ip.getAddress());
                            /**
                                 * if the node controller exists
                                 */
                            BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp);
                            if (candidateNcIp == null) {
                                candidateNcIp = availableIpsToSlots.ceilingKey(splitIp);
                            }
                            if (candidateNcIp != null) {
                                if (availableIpsToSlots.get(candidateNcIp).get() > 0) {
                                    byte[] candidateIP = candidateNcIp.getBytes();
                                    byte[] splitIP = splitIp.getBytes();
                                    int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16 | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF);
                                    int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16 | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF);
                                    int distance = Math.abs(candidateInt - splitInt);
                                    if (minDistance > distance) {
                                        minDistance = distance;
                                        currentCandidateIp = candidateNcIp;
                                    }
                                }
                            }
                        }
                    }
                } else {
                    for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) {
                        if (entry.getValue().get() > 0) {
                            currentCandidateIp = entry.getKey();
                            break;
                        }
                    }
                }
                if (currentCandidateIp != null) {
                    /**
                         * Update the entry of the selected IP
                         */
                    IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp);
                    availableSlot.set(availableSlot.get() - 1);
                    if (availableSlot.get() == 0) {
                        availableIpsToSlots.remove(currentCandidateIp);
                    }
                    /**
                         * Update the entry of the selected NC
                         */
                    List<String> dataLocations = ipToNcMapping.get(InetAddress.getByAddress(currentCandidateIp.getBytes()).getHostAddress());
                    for (String nc : dataLocations) {
                        int ncIndex = ncNameToIndex.get(nc);
                        if (workloads[ncIndex] < slotLimit) {
                            return nc;
                        }
                    }
                }
                /** not scheduled */
                return null;
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
        }

        @Override
        public int numAvailableSlots() {
            return availableIpsToSlots.size();
        }
    };
}
Also used : UnknownHostException(java.net.UnknownHostException) INcCollection(org.apache.hyracks.hdfs.api.INcCollection) BytesWritable(org.apache.hadoop.io.BytesWritable) TreeMap(java.util.TreeMap) UnknownHostException(java.net.UnknownHostException) InputSplit(org.apache.hadoop.mapred.InputSplit) InetAddress(java.net.InetAddress) IntWritable(org.apache.hadoop.io.IntWritable)

Example 3 with INcCollection

use of org.apache.hyracks.hdfs.api.INcCollection in project asterixdb by apache.

the class Scheduler method scheduleNonLocalSlots.

/**
     * Schedule non-local slots to each machine
     *
     * @param splits
     *            The HDFS file splits.
     * @param workloads
     *            The current capacity of each machine.
     * @param locations
     *            The result schedule.
     * @param slotLimit
     *            The maximum slots of each machine.
     * @param scheduled
     *            Indicate which slot is scheduled.
     */
private void scheduleNonLocalSlots(InputSplit[] splits, int[] workloads, String[] locations, int slotLimit, boolean[] scheduled) throws IOException, UnknownHostException {
    /**
         * build the map from available ips to the number of available slots
         */
    INcCollection ncCollection = this.ncCollectionBuilder.build(ncNameToNcInfos, ipToNcMapping, ncNameToIndex, NCs, workloads, slotLimit);
    if (ncCollection.numAvailableSlots() == 0) {
        return;
    }
    /**
         * schedule no-local file reads
         */
    for (int i = 0; i < splits.length; i++) {
        /** if there is no data-local NC choice, choose a random one */
        if (!scheduled[i]) {
            InputSplit split = splits[i];
            String selectedNcName = ncCollection.findNearestAvailableSlot(split);
            if (selectedNcName != null) {
                int ncIndex = ncNameToIndex.get(selectedNcName);
                workloads[ncIndex]++;
                scheduled[i] = true;
                locations[i] = selectedNcName;
            }
        }
    }
}
Also used : INcCollection(org.apache.hyracks.hdfs.api.INcCollection) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

InputSplit (org.apache.hadoop.mapred.InputSplit)3 INcCollection (org.apache.hyracks.hdfs.api.INcCollection)3 InetAddress (java.net.InetAddress)2 TreeMap (java.util.TreeMap)2 IntWritable (org.apache.hadoop.io.IntWritable)2 UnknownHostException (java.net.UnknownHostException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1