Search in sources :

Example 11 with EndpointAffinity

use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.

the class TestHardAffinityFragmentParallelizer method multiNodeCluster2.

@Test
public void multiNodeCluster2() throws Exception {
    final Wrapper wrapper = newWrapper(200, 1, 20, ImmutableList.of(new EndpointAffinity(N1_EP2, 0.15, true, MAX_VALUE), new EndpointAffinity(N2_EP2, 0.15, true, MAX_VALUE), new EndpointAffinity(N3_EP1, 0.10, true, MAX_VALUE), new EndpointAffinity(N4_EP2, 0.20, true, MAX_VALUE), new EndpointAffinity(N1_EP1, 0.20, true, MAX_VALUE)));
    INSTANCE.parallelizeFragment(wrapper, newParameters(1, 5, 20), null);
    // Expect the fragment parallelization to be 20 because:
    // 1. the cost (200) is above the threshold (SLICE_TARGET_DEFAULT) (which gives 200/1=200 width) and
    // 2. Number of mandatory node assignments are 5 (current width 200 satisfies the requirement)
    // 3. max fragment width is 20 which limits the width
    assertEquals(20, wrapper.getWidth());
    final List<DrillbitEndpoint> assignedEps = wrapper.getAssignedEndpoints();
    assertEquals(20, assignedEps.size());
    final HashMultiset<DrillbitEndpoint> counts = HashMultiset.create();
    for (final DrillbitEndpoint ep : assignedEps) {
        counts.add(ep);
    }
    // Each node gets at max 5.
    assertTrue(counts.count(N1_EP2) <= 5);
    assertTrue(counts.count(N2_EP2) <= 5);
    assertTrue(counts.count(N3_EP1) <= 5);
    assertTrue(counts.count(N4_EP2) <= 5);
    assertTrue(counts.count(N1_EP1) <= 5);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) EndpointAffinity(org.apache.drill.exec.physical.EndpointAffinity) Test(org.junit.Test) BaseTest(org.apache.drill.test.BaseTest) PlannerTest(org.apache.drill.categories.PlannerTest)

Example 12 with EndpointAffinity

use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.

the class HardAffinityFragmentParallelizer method parallelizeFragment.

@Override
public void parallelizeFragment(final Wrapper fragmentWrapper, final ParallelizationParameters parameters, final Collection<DrillbitEndpoint> activeEndpoints) throws PhysicalOperatorSetupException {
    final Stats stats = fragmentWrapper.getStats();
    final ParallelizationInfo pInfo = stats.getParallelizationInfo();
    int totalMaxWidth = 0;
    // Go through the affinity map and extract the endpoints that have mandatory assignment requirement
    final Map<DrillbitEndpoint, EndpointAffinity> endpointPool = Maps.newHashMap();
    for (Entry<DrillbitEndpoint, EndpointAffinity> entry : pInfo.getEndpointAffinityMap().entrySet()) {
        if (entry.getValue().isAssignmentRequired()) {
            endpointPool.put(entry.getKey(), entry.getValue());
            // Limit the max width of the endpoint to allowed max width.
            totalMaxWidth += Math.min(parameters.getMaxWidthPerNode(), entry.getValue().getMaxWidth());
            if (totalMaxWidth < 0) {
                // If the totalWidth overflows, just keep it at the max value.
                totalMaxWidth = Integer.MAX_VALUE;
            }
        }
    }
    // Step 1: Find the width taking into account various parameters
    // 1.1. Find the parallelization based on cost. Use max cost of all operators in this fragment; this is consistent
    // with the calculation that ExcessiveExchangeRemover uses.
    int width = (int) Math.ceil(stats.getMaxCost() / parameters.getSliceTarget());
    // 1.2. Make sure the width is at least the number of endpoints that require an assignment
    width = Math.max(endpointPool.size(), width);
    // 1.3. Cap the parallelization width by fragment level width limit and system level per query width limit
    width = Math.max(1, Math.min(width, pInfo.getMaxWidth()));
    checkOrThrow(endpointPool.size() <= width, logger, "Number of mandatory endpoints ({}) that require an assignment is more than the allowed fragment max " + "width ({}).", endpointPool.size(), pInfo.getMaxWidth());
    // 1.4 Cap the parallelization width by global max query width
    width = Math.max(1, Math.min(width, parameters.getMaxGlobalWidth()));
    checkOrThrow(endpointPool.size() <= width, logger, "Number of mandatory endpoints ({}) that require an assignment is more than the allowed global query " + "width ({}).", endpointPool.size(), parameters.getMaxGlobalWidth());
    // 1.5 Cap the parallelization width by max allowed parallelization per node
    width = Math.max(1, Math.min(width, endpointPool.size() * parameters.getMaxWidthPerNode()));
    // 1.6 Cap the parallelization width by total of max allowed width per node. The reason is if the width is more,
    // we end up allocating more work units to one or more endpoints that don't have that many work units.
    width = Math.min(totalMaxWidth, width);
    // Step 2: Select the endpoints
    final Map<DrillbitEndpoint, Integer> endpoints = Maps.newHashMap();
    // 2.1 First add each endpoint from the pool once so that the mandatory assignment requirement is fulfilled.
    for (Entry<DrillbitEndpoint, EndpointAffinity> entry : endpointPool.entrySet()) {
        endpoints.put(entry.getKey(), 1);
    }
    int totalAssigned = endpoints.size();
    // 2.2 Assign the remaining slots to endpoints proportional to the affinity of each endpoint
    int remainingSlots = width - endpoints.size();
    while (remainingSlots > 0) {
        for (EndpointAffinity epAf : endpointPool.values()) {
            final int moreAllocation = (int) Math.ceil(epAf.getAffinity() * remainingSlots);
            int currentAssignments = endpoints.get(epAf.getEndpoint());
            for (int i = 0; i < moreAllocation && totalAssigned < width && currentAssignments < parameters.getMaxWidthPerNode() && currentAssignments < epAf.getMaxWidth(); i++) {
                totalAssigned++;
                currentAssignments++;
            }
            endpoints.put(epAf.getEndpoint(), currentAssignments);
        }
        final int previousRemainingSlots = remainingSlots;
        remainingSlots = width - totalAssigned;
        if (previousRemainingSlots == remainingSlots) {
            logger.error("Can't parallelize fragment: " + "Every mandatory node has exhausted the maximum width per node limit." + EOL + "Endpoint pool: {}" + EOL + "Assignment so far: {}" + EOL + "Width: {}", endpointPool, endpoints, width);
            throw new PhysicalOperatorSetupException("Can not parallelize fragment.");
        }
    }
    final List<DrillbitEndpoint> assignedEndpoints = Lists.newArrayList();
    for (Entry<DrillbitEndpoint, Integer> entry : endpoints.entrySet()) {
        for (int i = 0; i < entry.getValue(); i++) {
            assignedEndpoints.add(entry.getKey());
        }
    }
    fragmentWrapper.setWidth(width);
    fragmentWrapper.assignEndpoints(assignedEndpoints);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PhysicalOperatorSetupException(org.apache.drill.exec.physical.PhysicalOperatorSetupException) EndpointAffinity(org.apache.drill.exec.physical.EndpointAffinity) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)

Example 13 with EndpointAffinity

use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.

the class SoftAffinityFragmentParallelizer method findEndpoints.

// Assign endpoints based on the given endpoint list, affinity map and width.
private List<DrillbitEndpoint> findEndpoints(final Collection<DrillbitEndpoint> activeEndpoints, final Map<DrillbitEndpoint, EndpointAffinity> endpointAffinityMap, final int width, final ParallelizationParameters parameters) throws PhysicalOperatorSetupException {
    final List<DrillbitEndpoint> endpoints = Lists.newArrayList();
    if (endpointAffinityMap.size() > 0) {
        // Get EndpointAffinity list sorted in descending order of affinity values
        List<EndpointAffinity> sortedAffinityList = ENDPOINT_AFFINITY_ORDERING.immutableSortedCopy(endpointAffinityMap.values());
        // Find the number of mandatory nodes (nodes with +infinity affinity).
        int numRequiredNodes = 0;
        for (EndpointAffinity ep : sortedAffinityList) {
            if (ep.isAssignmentRequired()) {
                numRequiredNodes++;
            } else {
                // of non-mandatory node
                break;
            }
        }
        if (width < numRequiredNodes) {
            throw new PhysicalOperatorSetupException("Can not parallelize the fragment as the parallelization width (" + width + ") is " + "less than the number of mandatory nodes (" + numRequiredNodes + " nodes with +INFINITE affinity).");
        }
        // Find the maximum number of slots which should go to endpoints with affinity (See DRILL-825 for details)
        int affinedSlots = Math.max(1, (int) (Math.ceil(parameters.getAffinityFactor() * width / activeEndpoints.size()) * sortedAffinityList.size()));
        // Make sure affined slots is at least the number of mandatory nodes
        affinedSlots = Math.max(affinedSlots, numRequiredNodes);
        // Cap the affined slots to max parallelization width
        affinedSlots = Math.min(affinedSlots, width);
        Iterator<EndpointAffinity> affinedEPItr = Iterators.cycle(sortedAffinityList);
        // Keep adding until we have selected "affinedSlots" number of endpoints.
        while (endpoints.size() < affinedSlots) {
            EndpointAffinity ea = affinedEPItr.next();
            endpoints.add(ea.getEndpoint());
        }
    }
    // add remaining endpoints if required
    if (endpoints.size() < width) {
        // Get a list of endpoints that are not part of the affinity endpoint list
        List<DrillbitEndpoint> endpointsWithNoAffinity;
        final Set<DrillbitEndpoint> endpointsWithAffinity = endpointAffinityMap.keySet();
        if (endpointAffinityMap.size() > 0) {
            endpointsWithNoAffinity = Lists.newArrayList();
            for (DrillbitEndpoint ep : activeEndpoints) {
                if (!endpointsWithAffinity.contains(ep)) {
                    endpointsWithNoAffinity.add(ep);
                }
            }
        } else {
            // Need to create a copy instead of an
            endpointsWithNoAffinity = Lists.newArrayList(activeEndpoints);
        // immutable copy, because we need to shuffle the list (next statement) and Collections.shuffle() doesn't
        // support immutable copy as input.
        }
        // round robin with random start.
        Collections.shuffle(endpointsWithNoAffinity, ThreadLocalRandom.current());
        Iterator<DrillbitEndpoint> otherEPItr = Iterators.cycle(endpointsWithNoAffinity.size() > 0 ? endpointsWithNoAffinity : endpointsWithAffinity);
        while (endpoints.size() < width) {
            endpoints.add(otherEPItr.next());
        }
    }
    return endpoints;
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PhysicalOperatorSetupException(org.apache.drill.exec.physical.PhysicalOperatorSetupException) EndpointAffinity(org.apache.drill.exec.physical.EndpointAffinity) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)

Example 14 with EndpointAffinity

use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.

the class AbstractDeMuxExchange method getSenderParallelizationInfo.

@Override
public ParallelizationInfo getSenderParallelizationInfo(List<DrillbitEndpoint> receiverFragmentEndpoints) {
    Preconditions.checkArgument(receiverFragmentEndpoints != null && receiverFragmentEndpoints.size() > 0, "Receiver fragment endpoint list should not be empty");
    // We want to run one demux sender per Drillbit endpoint.
    // Identify the number of unique Drillbit endpoints in receiver fragment endpoints.
    List<DrillbitEndpoint> drillbitEndpoints = ImmutableSet.copyOf(receiverFragmentEndpoints).asList();
    List<EndpointAffinity> affinities = Lists.newArrayList();
    for (DrillbitEndpoint ep : drillbitEndpoints) {
        affinities.add(new EndpointAffinity(ep, Double.POSITIVE_INFINITY));
    }
    return ParallelizationInfo.create(affinities.size(), affinities.size(), affinities);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) EndpointAffinity(org.apache.drill.exec.physical.EndpointAffinity)

Example 15 with EndpointAffinity

use of org.apache.drill.exec.physical.EndpointAffinity in project drill by apache.

the class EasyGroupScan method createMappings.

private void createMappings(List<EndpointAffinity> affinities) {
    List<DrillbitEndpoint> endpoints = Lists.newArrayList();
    for (EndpointAffinity e : affinities) {
        endpoints.add(e.getEndpoint());
    }
    applyAssignments(endpoints);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) EndpointAffinity(org.apache.drill.exec.physical.EndpointAffinity)

Aggregations

EndpointAffinity (org.apache.drill.exec.physical.EndpointAffinity)28 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)28 HashMap (java.util.HashMap)6 ArrayList (java.util.ArrayList)4 PlannerTest (org.apache.drill.categories.PlannerTest)4 PhysicalOperatorSetupException (org.apache.drill.exec.physical.PhysicalOperatorSetupException)4 Test (org.junit.Test)4 ObjectFloatHashMap (com.carrotsearch.hppc.ObjectFloatHashMap)2 ServerAddress (com.mongodb.ServerAddress)2 IOException (java.io.IOException)2 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)2 LogicalInputSplit (org.apache.drill.exec.store.hive.HiveMetadataProvider.LogicalInputSplit)2 BaseTest (org.apache.drill.test.BaseTest)2 ServerName (org.apache.hadoop.hbase.ServerName)2 Stopwatch (com.google.common.base.Stopwatch)1 LinkedList (java.util.LinkedList)1 Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)1