use of org.apache.drill.exec.store.hbase.HBaseSubScan.HBaseSubScanSpec in project drill by apache.
the class HBaseGroupScan method applyAssignments.
/**
*
* @param incomingEndpoints
*/
@Override
public void applyAssignments(List<DrillbitEndpoint> incomingEndpoints) {
watch.reset();
watch.start();
final int numSlots = incomingEndpoints.size();
Preconditions.checkArgument(numSlots <= regionsToScan.size(), String.format("Incoming endpoints %d is greater than number of scan regions %d", numSlots, regionsToScan.size()));
/*
* Minimum/Maximum number of assignment per slot
*/
final int minPerEndpointSlot = (int) Math.floor((double) regionsToScan.size() / numSlots);
final int maxPerEndpointSlot = (int) Math.ceil((double) regionsToScan.size() / numSlots);
/*
* initialize (endpoint index => HBaseSubScanSpec list) map
*/
endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots);
/*
* another map with endpoint (hostname => corresponding index list) in 'incomingEndpoints' list
*/
Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap();
/*
* Initialize these two maps
*/
for (int i = 0; i < numSlots; ++i) {
endpointFragmentMapping.put(i, new ArrayList<HBaseSubScanSpec>(maxPerEndpointSlot));
String hostname = incomingEndpoints.get(i).getAddress();
Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname);
if (hostIndexQueue == null) {
hostIndexQueue = Lists.newLinkedList();
endpointHostIndexListMap.put(hostname, hostIndexQueue);
}
hostIndexQueue.add(i);
}
Set<Entry<HRegionInfo, ServerName>> regionsToAssignSet = Sets.newHashSet(regionsToScan.entrySet());
/*
* First, we assign regions which are hosted on region servers running on drillbit endpoints
*/
for (Iterator<Entry<HRegionInfo, ServerName>> regionsIterator = regionsToAssignSet.iterator(); regionsIterator.hasNext(); ) /*nothing*/
{
Entry<HRegionInfo, ServerName> regionEntry = regionsIterator.next();
/*
* Test if there is a drillbit endpoint which is also an HBase RegionServer that hosts the current HBase region
*/
Queue<Integer> endpointIndexlist = endpointHostIndexListMap.get(regionEntry.getValue().getHostname());
if (endpointIndexlist != null) {
Integer slotIndex = endpointIndexlist.poll();
List<HBaseSubScanSpec> endpointSlotScanList = endpointFragmentMapping.get(slotIndex);
endpointSlotScanList.add(regionInfoToSubScanSpec(regionEntry.getKey()));
// add to the tail of the slot list, to add more later in round robin fashion
endpointIndexlist.offer(slotIndex);
// this region has been assigned
regionsIterator.remove();
}
}
/*
* Build priority queues of slots, with ones which has tasks lesser than 'minPerEndpointSlot' and another which have more.
*/
PriorityQueue<List<HBaseSubScanSpec>> minHeap = new PriorityQueue<List<HBaseSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR);
PriorityQueue<List<HBaseSubScanSpec>> maxHeap = new PriorityQueue<List<HBaseSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV);
for (List<HBaseSubScanSpec> listOfScan : endpointFragmentMapping.values()) {
if (listOfScan.size() < minPerEndpointSlot) {
minHeap.offer(listOfScan);
} else if (listOfScan.size() > minPerEndpointSlot) {
maxHeap.offer(listOfScan);
}
}
/*
* Now, let's process any regions which remain unassigned and assign them to slots with minimum number of assignments.
*/
if (regionsToAssignSet.size() > 0) {
for (Entry<HRegionInfo, ServerName> regionEntry : regionsToAssignSet) {
List<HBaseSubScanSpec> smallestList = minHeap.poll();
smallestList.add(regionInfoToSubScanSpec(regionEntry.getKey()));
if (smallestList.size() < maxPerEndpointSlot) {
minHeap.offer(smallestList);
}
}
}
/*
* While there are slots with lesser than 'minPerEndpointSlot' unit work, balance from those with more.
*/
while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) {
List<HBaseSubScanSpec> smallestList = minHeap.poll();
List<HBaseSubScanSpec> largestList = maxHeap.poll();
smallestList.add(largestList.remove(largestList.size() - 1));
if (largestList.size() > minPerEndpointSlot) {
maxHeap.offer(largestList);
}
if (smallestList.size() < minPerEndpointSlot) {
minHeap.offer(smallestList);
}
}
/* no slot should be empty at this point */
assert (minHeap.peek() == null || minHeap.peek().size() > 0) : String.format("Unable to assign tasks to some endpoints.\nEndpoints: {}.\nAssignment Map: {}.", incomingEndpoints, endpointFragmentMapping.toString());
logger.debug("Built assignment map in {} µs.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, incomingEndpoints, endpointFragmentMapping.toString());
}
Aggregations