use of org.apache.drill.exec.physical.EndpointAffinity in project drill by axbaretto.
the class AbstractExchange method getDefaultAffinityMap.
/**
* Get a default endpoint affinity map where affinity of a Drillbit is proportional to the number of its occurrances
* in given endpoint list.
*
* @param fragmentEndpoints Drillbit endpoint assignments of fragments.
* @return List of EndpointAffinity objects for each Drillbit endpoint given <i>fragmentEndpoints</i>.
*/
protected static List<EndpointAffinity> getDefaultAffinityMap(List<DrillbitEndpoint> fragmentEndpoints) {
Map<DrillbitEndpoint, EndpointAffinity> affinityMap = Maps.newHashMap();
final double affinityPerOccurrence = 1.0d / fragmentEndpoints.size();
for (DrillbitEndpoint sender : fragmentEndpoints) {
if (affinityMap.containsKey(sender)) {
affinityMap.get(sender).addAffinity(affinityPerOccurrence);
} else {
affinityMap.put(sender, new EndpointAffinity(sender, affinityPerOccurrence));
}
}
return new ArrayList<>(affinityMap.values());
}
use of org.apache.drill.exec.physical.EndpointAffinity in project drill by axbaretto.
the class SoftAffinityFragmentParallelizer method findEndpoints.
// Assign endpoints based on the given endpoint list, affinity map and width.
private List<DrillbitEndpoint> findEndpoints(final Collection<DrillbitEndpoint> activeEndpoints, final Map<DrillbitEndpoint, EndpointAffinity> endpointAffinityMap, final int width, final ParallelizationParameters parameters) throws PhysicalOperatorSetupException {
final List<DrillbitEndpoint> endpoints = Lists.newArrayList();
if (endpointAffinityMap.size() > 0) {
// Get EndpointAffinity list sorted in descending order of affinity values
List<EndpointAffinity> sortedAffinityList = ENDPOINT_AFFINITY_ORDERING.immutableSortedCopy(endpointAffinityMap.values());
// Find the number of mandatory nodes (nodes with +infinity affinity).
int numRequiredNodes = 0;
for (EndpointAffinity ep : sortedAffinityList) {
if (ep.isAssignmentRequired()) {
numRequiredNodes++;
} else {
// of non-mandatory node
break;
}
}
if (width < numRequiredNodes) {
throw new PhysicalOperatorSetupException("Can not parallelize the fragment as the parallelization width (" + width + ") is " + "less than the number of mandatory nodes (" + numRequiredNodes + " nodes with +INFINITE affinity).");
}
// Find the maximum number of slots which should go to endpoints with affinity (See DRILL-825 for details)
int affinedSlots = Math.max(1, (int) (Math.ceil((double) parameters.getAffinityFactor() * width / activeEndpoints.size()) * sortedAffinityList.size()));
// Make sure affined slots is at least the number of mandatory nodes
affinedSlots = Math.max(affinedSlots, numRequiredNodes);
// Cap the affined slots to max parallelization width
affinedSlots = Math.min(affinedSlots, width);
Iterator<EndpointAffinity> affinedEPItr = Iterators.cycle(sortedAffinityList);
// Keep adding until we have selected "affinedSlots" number of endpoints.
while (endpoints.size() < affinedSlots) {
EndpointAffinity ea = affinedEPItr.next();
endpoints.add(ea.getEndpoint());
}
}
// add remaining endpoints if required
if (endpoints.size() < width) {
// Get a list of endpoints that are not part of the affinity endpoint list
List<DrillbitEndpoint> endpointsWithNoAffinity;
final Set<DrillbitEndpoint> endpointsWithAffinity = endpointAffinityMap.keySet();
if (endpointAffinityMap.size() > 0) {
endpointsWithNoAffinity = Lists.newArrayList();
for (DrillbitEndpoint ep : activeEndpoints) {
if (!endpointsWithAffinity.contains(ep)) {
endpointsWithNoAffinity.add(ep);
}
}
} else {
// Need to create a copy instead of an
endpointsWithNoAffinity = Lists.newArrayList(activeEndpoints);
// immutable copy, because we need to shuffle the list (next statement) and Collections.shuffle() doesn't
// support immutable copy as input.
}
// round robin with random start.
Collections.shuffle(endpointsWithNoAffinity, ThreadLocalRandom.current());
Iterator<DrillbitEndpoint> otherEPItr = Iterators.cycle(endpointsWithNoAffinity.size() > 0 ? endpointsWithNoAffinity : endpointsWithAffinity);
while (endpoints.size() < width) {
endpoints.add(otherEPItr.next());
}
}
return endpoints;
}
use of org.apache.drill.exec.physical.EndpointAffinity in project drill by axbaretto.
the class HardAffinityFragmentParallelizer method parallelizeFragment.
@Override
public void parallelizeFragment(final Wrapper fragmentWrapper, final ParallelizationParameters parameters, final Collection<DrillbitEndpoint> activeEndpoints) throws PhysicalOperatorSetupException {
final Stats stats = fragmentWrapper.getStats();
final ParallelizationInfo pInfo = stats.getParallelizationInfo();
int totalMaxWidth = 0;
// Go through the affinity map and extract the endpoints that have mandatory assignment requirement
final Map<DrillbitEndpoint, EndpointAffinity> endpointPool = Maps.newHashMap();
for (Entry<DrillbitEndpoint, EndpointAffinity> entry : pInfo.getEndpointAffinityMap().entrySet()) {
if (entry.getValue().isAssignmentRequired()) {
endpointPool.put(entry.getKey(), entry.getValue());
// Limit the max width of the endpoint to allowed max width.
totalMaxWidth += Math.min(parameters.getMaxWidthPerNode(), entry.getValue().getMaxWidth());
if (totalMaxWidth < 0) {
// If the totalWidth overflows, just keep it at the max value.
totalMaxWidth = Integer.MAX_VALUE;
}
}
}
// Step 1: Find the width taking into account various parameters
// 1.1. Find the parallelization based on cost. Use max cost of all operators in this fragment; this is consistent
// with the calculation that ExcessiveExchangeRemover uses.
int width = (int) Math.ceil(stats.getMaxCost() / parameters.getSliceTarget());
// 1.2. Make sure the width is at least the number of endpoints that require an assignment
width = Math.max(endpointPool.size(), width);
// 1.3. Cap the parallelization width by fragment level width limit and system level per query width limit
width = Math.max(1, Math.min(width, pInfo.getMaxWidth()));
checkOrThrow(endpointPool.size() <= width, logger, "Number of mandatory endpoints ({}) that require an assignment is more than the allowed fragment max " + "width ({}).", endpointPool.size(), pInfo.getMaxWidth());
// 1.4 Cap the parallelization width by global max query width
width = Math.max(1, Math.min(width, parameters.getMaxGlobalWidth()));
checkOrThrow(endpointPool.size() <= width, logger, "Number of mandatory endpoints ({}) that require an assignment is more than the allowed global query " + "width ({}).", endpointPool.size(), parameters.getMaxGlobalWidth());
// 1.5 Cap the parallelization width by max allowed parallelization per node
width = Math.max(1, Math.min(width, endpointPool.size() * parameters.getMaxWidthPerNode()));
// 1.6 Cap the parallelization width by total of max allowed width per node. The reason is if we the width is more,
// we end up allocating more work units to one or more endpoints that don't have those many work units.
width = Math.min(totalMaxWidth, width);
// Step 2: Select the endpoints
final Map<DrillbitEndpoint, Integer> endpoints = Maps.newHashMap();
// 2.1 First add each endpoint from the pool once so that the mandatory assignment requirement is fulfilled.
for (Entry<DrillbitEndpoint, EndpointAffinity> entry : endpointPool.entrySet()) {
endpoints.put(entry.getKey(), 1);
}
int totalAssigned = endpoints.size();
// 2.2 Assign the remaining slots to endpoints proportional to the affinity of each endpoint
int remainingSlots = width - endpoints.size();
while (remainingSlots > 0) {
for (EndpointAffinity epAf : endpointPool.values()) {
final int moreAllocation = (int) Math.ceil(epAf.getAffinity() * remainingSlots);
int currentAssignments = endpoints.get(epAf.getEndpoint());
for (int i = 0; i < moreAllocation && totalAssigned < width && currentAssignments < parameters.getMaxWidthPerNode() && currentAssignments < epAf.getMaxWidth(); i++) {
totalAssigned++;
currentAssignments++;
}
endpoints.put(epAf.getEndpoint(), currentAssignments);
}
final int previousRemainingSlots = remainingSlots;
remainingSlots = width - totalAssigned;
if (previousRemainingSlots == remainingSlots) {
logger.error("Can't parallelize fragment: " + "Every mandatory node has exhausted the maximum width per node limit." + EOL + "Endpoint pool: {}" + EOL + "Assignment so far: {}" + EOL + "Width: {}", endpointPool, endpoints, width);
throw new PhysicalOperatorSetupException("Can not parallelize fragment.");
}
}
final List<DrillbitEndpoint> assignedEndpoints = Lists.newArrayList();
for (Entry<DrillbitEndpoint, Integer> entry : endpoints.entrySet()) {
for (int i = 0; i < entry.getValue(); i++) {
assignedEndpoints.add(entry.getKey());
}
}
fragmentWrapper.setWidth(width);
fragmentWrapper.assignEndpoints(assignedEndpoints);
}
use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.
the class SystemTableScan method getOperatorAffinity.
/**
* If distributed, the scan needs to happen on every node. Since width is enforced, the number of fragments equals
* number of Drillbits. And here we set, each endpoint as mandatory assignment required to ensure every
* Drillbit executes a fragment.
* @return the Drillbit endpoint affinities
*/
@Override
public List<EndpointAffinity> getOperatorAffinity() {
if (table.isDistributed()) {
final List<EndpointAffinity> affinities = Lists.newArrayList();
final Collection<DrillbitEndpoint> bits = plugin.getContext().getBits();
final double affinityPerNode = 1d / bits.size();
for (final DrillbitEndpoint endpoint : bits) {
affinities.add(new EndpointAffinity(endpoint, affinityPerNode, true, /* maxWidth = */
1));
}
return affinities;
} else {
return Collections.emptyList();
}
}
use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.
the class TestHardAffinityFragmentParallelizer method simpleCase2.
@Test
public void simpleCase2() throws Exception {
// Set the slice target to 1
final Wrapper wrapper = newWrapper(200, 1, 20, Collections.singletonList(new EndpointAffinity(N1_EP1, 1.0, true, MAX_VALUE)));
INSTANCE.parallelizeFragment(wrapper, newParameters(1, 5, 20), null);
// Expect the fragment parallelization to be 5:
// 1. the cost (200) is above the threshold (SLICE_TARGET_DEFAULT) (which gives 200/1=200 width) and
// 2. Max width per node is 5 (limits the width 200 to 5)
assertEquals(5, wrapper.getWidth());
final List<DrillbitEndpoint> assignedEps = wrapper.getAssignedEndpoints();
assertEquals(5, assignedEps.size());
for (DrillbitEndpoint ep : assignedEps) {
assertEquals(N1_EP1, ep);
}
}
Aggregations