Search in sources :

Example 1 with HBaseSubScanSpec

use of org.apache.drill.exec.store.hbase.HBaseSubScan.HBaseSubScanSpec in project drill by apache.

the class HBaseGroupScan method applyAssignments.

/**
   *
   * @param incomingEndpoints
   */
@Override
public void applyAssignments(List<DrillbitEndpoint> incomingEndpoints) {
    watch.reset();
    watch.start();
    final int numSlots = incomingEndpoints.size();
    Preconditions.checkArgument(numSlots <= regionsToScan.size(), String.format("Incoming endpoints %d is greater than number of scan regions %d", numSlots, regionsToScan.size()));
    /*
     * Minimum/Maximum number of assignment per slot
     */
    final int minPerEndpointSlot = (int) Math.floor((double) regionsToScan.size() / numSlots);
    final int maxPerEndpointSlot = (int) Math.ceil((double) regionsToScan.size() / numSlots);
    /*
     * initialize (endpoint index => HBaseSubScanSpec list) map
     */
    endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots);
    /*
     * another map with endpoint (hostname => corresponding index list) in 'incomingEndpoints' list
     */
    Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap();
    /*
     * Initialize these two maps
     */
    for (int i = 0; i < numSlots; ++i) {
        endpointFragmentMapping.put(i, new ArrayList<HBaseSubScanSpec>(maxPerEndpointSlot));
        String hostname = incomingEndpoints.get(i).getAddress();
        Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname);
        if (hostIndexQueue == null) {
            hostIndexQueue = Lists.newLinkedList();
            endpointHostIndexListMap.put(hostname, hostIndexQueue);
        }
        hostIndexQueue.add(i);
    }
    Set<Entry<HRegionInfo, ServerName>> regionsToAssignSet = Sets.newHashSet(regionsToScan.entrySet());
    /*
     * First, we assign regions which are hosted on region servers running on drillbit endpoints
     */
    for (Iterator<Entry<HRegionInfo, ServerName>> regionsIterator = regionsToAssignSet.iterator(); regionsIterator.hasNext(); ) /*nothing*/
    {
        Entry<HRegionInfo, ServerName> regionEntry = regionsIterator.next();
        /*
       * Test if there is a drillbit endpoint which is also an HBase RegionServer that hosts the current HBase region
       */
        Queue<Integer> endpointIndexlist = endpointHostIndexListMap.get(regionEntry.getValue().getHostname());
        if (endpointIndexlist != null) {
            Integer slotIndex = endpointIndexlist.poll();
            List<HBaseSubScanSpec> endpointSlotScanList = endpointFragmentMapping.get(slotIndex);
            endpointSlotScanList.add(regionInfoToSubScanSpec(regionEntry.getKey()));
            // add to the tail of the slot list, to add more later in round robin fashion
            endpointIndexlist.offer(slotIndex);
            // this region has been assigned
            regionsIterator.remove();
        }
    }
    /*
     * Build priority queues of slots, with ones which has tasks lesser than 'minPerEndpointSlot' and another which have more.
     */
    PriorityQueue<List<HBaseSubScanSpec>> minHeap = new PriorityQueue<List<HBaseSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR);
    PriorityQueue<List<HBaseSubScanSpec>> maxHeap = new PriorityQueue<List<HBaseSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV);
    for (List<HBaseSubScanSpec> listOfScan : endpointFragmentMapping.values()) {
        if (listOfScan.size() < minPerEndpointSlot) {
            minHeap.offer(listOfScan);
        } else if (listOfScan.size() > minPerEndpointSlot) {
            maxHeap.offer(listOfScan);
        }
    }
    /*
     * Now, let's process any regions which remain unassigned and assign them to slots with minimum number of assignments.
     */
    if (regionsToAssignSet.size() > 0) {
        for (Entry<HRegionInfo, ServerName> regionEntry : regionsToAssignSet) {
            List<HBaseSubScanSpec> smallestList = minHeap.poll();
            smallestList.add(regionInfoToSubScanSpec(regionEntry.getKey()));
            if (smallestList.size() < maxPerEndpointSlot) {
                minHeap.offer(smallestList);
            }
        }
    }
    /*
     * While there are slots with lesser than 'minPerEndpointSlot' unit work, balance from those with more.
     */
    while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) {
        List<HBaseSubScanSpec> smallestList = minHeap.poll();
        List<HBaseSubScanSpec> largestList = maxHeap.poll();
        smallestList.add(largestList.remove(largestList.size() - 1));
        if (largestList.size() > minPerEndpointSlot) {
            maxHeap.offer(largestList);
        }
        if (smallestList.size() < minPerEndpointSlot) {
            minHeap.offer(smallestList);
        }
    }
    /* no slot should be empty at this point */
    assert (minHeap.peek() == null || minHeap.peek().size() > 0) : String.format("Unable to assign tasks to some endpoints.\nEndpoints: {}.\nAssignment Map: {}.", incomingEndpoints, endpointFragmentMapping.toString());
    logger.debug("Built assignment map in {} µs.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, incomingEndpoints, endpointFragmentMapping.toString());
}
Also used : PriorityQueue(java.util.PriorityQueue) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Entry(java.util.Map.Entry) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) PriorityQueue(java.util.PriorityQueue) Queue(java.util.Queue) HBaseSubScanSpec(org.apache.drill.exec.store.hbase.HBaseSubScan.HBaseSubScanSpec)

Aggregations

ArrayList (java.util.ArrayList)1 List (java.util.List)1 Entry (java.util.Map.Entry)1 PriorityQueue (java.util.PriorityQueue)1 Queue (java.util.Queue)1 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)1 HBaseSubScanSpec (org.apache.drill.exec.store.hbase.HBaseSubScan.HBaseSubScanSpec)1 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)1 ServerName (org.apache.hadoop.hbase.ServerName)1