Search in sources :

Example 1 with PlanFragment

use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by apache.

the class Foreman method setupNonRootFragments.

/**
   * Set up the non-root fragments for execution. Some may be local, and some may be remote.
   * Messages are sent immediately, so they may start returning data even before we complete this.
   *
   * @param fragments the fragments
   * @throws ForemanException
   */
private void setupNonRootFragments(final Collection<PlanFragment> fragments) throws ForemanException {
    if (fragments.isEmpty()) {
        // nothing to do here
        return;
    }
    /*
     * We will send a single message to each endpoint, regardless of how many fragments will be
     * executed there. We need to start up the intermediate fragments first so that they will be
     * ready once the leaf fragments start producing data. To satisfy both of these, we will
     * make a pass through the fragments and put them into these two maps according to their
     * leaf/intermediate state, as well as their target drillbit.
     */
    final Multimap<DrillbitEndpoint, PlanFragment> leafFragmentMap = ArrayListMultimap.create();
    final Multimap<DrillbitEndpoint, PlanFragment> intFragmentMap = ArrayListMultimap.create();
    // record all fragments for status purposes.
    for (final PlanFragment planFragment : fragments) {
        logger.trace("Tracking intermediate remote node {} with data {}", planFragment.getAssignment(), planFragment.getFragmentJson());
        queryManager.addFragmentStatusTracker(planFragment, false);
        if (planFragment.getLeafFragment()) {
            leafFragmentMap.put(planFragment.getAssignment(), planFragment);
        } else {
            intFragmentMap.put(planFragment.getAssignment(), planFragment);
        }
    }
    /*
     * We need to wait for the intermediates to be sent so that they'll be set up by the time
     * the leaves start producing data. We'll use this latch to wait for the responses.
     *
     * However, in order not to hang the process if any of the RPC requests fails, we always
     * count down (see FragmentSubmitFailures), but we count the number of failures so that we'll
     * know if any submissions did fail.
     */
    final int numIntFragments = intFragmentMap.keySet().size();
    final ExtendedLatch endpointLatch = new ExtendedLatch(numIntFragments);
    final FragmentSubmitFailures fragmentSubmitFailures = new FragmentSubmitFailures();
    // send remote intermediate fragments
    for (final DrillbitEndpoint ep : intFragmentMap.keySet()) {
        sendRemoteFragments(ep, intFragmentMap.get(ep), endpointLatch, fragmentSubmitFailures);
    }
    final long timeout = RPC_WAIT_IN_MSECS_PER_FRAGMENT * numIntFragments;
    if (numIntFragments > 0 && !endpointLatch.awaitUninterruptibly(timeout)) {
        long numberRemaining = endpointLatch.getCount();
        throw UserException.connectionError().message("Exceeded timeout (%d) while waiting send intermediate work fragments to remote nodes. " + "Sent %d and only heard response back from %d nodes.", timeout, numIntFragments, numIntFragments - numberRemaining).build(logger);
    }
    // if any of the intermediate fragment submissions failed, fail the query
    final List<FragmentSubmitFailures.SubmissionException> submissionExceptions = fragmentSubmitFailures.submissionExceptions;
    if (submissionExceptions.size() > 0) {
        Set<DrillbitEndpoint> endpoints = Sets.newHashSet();
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        for (FragmentSubmitFailures.SubmissionException e : fragmentSubmitFailures.submissionExceptions) {
            DrillbitEndpoint endpoint = e.drillbitEndpoint;
            if (endpoints.add(endpoint)) {
                if (first) {
                    first = false;
                } else {
                    sb.append(", ");
                }
                sb.append(endpoint.getAddress());
            }
        }
        throw UserException.connectionError(submissionExceptions.get(0).rpcException).message("Error setting up remote intermediate fragment execution").addContext("Nodes with failures", sb.toString()).build(logger);
    }
    injector.injectChecked(queryContext.getExecutionControls(), "send-fragments", ForemanException.class);
    /*
     * Send the remote (leaf) fragments; we don't wait for these. Any problems will come in through
     * the regular sendListener event delivery.
     */
    for (final DrillbitEndpoint ep : leafFragmentMap.keySet()) {
        sendRemoteFragments(ep, leafFragmentMap.get(ep), null, null);
    }
}
Also used : PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ExtendedLatch(org.apache.drill.common.concurrent.ExtendedLatch) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)

Example 2 with PlanFragment

use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by apache.

the class Foreman method getQueryWorkUnit.

private QueryWorkUnit getQueryWorkUnit(final PhysicalPlan plan) throws ExecutionSetupException {
    final PhysicalOperator rootOperator = plan.getSortedOperators(false).iterator().next();
    final Fragment rootFragment = rootOperator.accept(MakeFragmentsVisitor.INSTANCE, null);
    final SimpleParallelizer parallelizer = new SimpleParallelizer(queryContext);
    final QueryWorkUnit queryWorkUnit = parallelizer.getFragments(queryContext.getOptions().getOptionList(), queryContext.getCurrentEndpoint(), queryId, queryContext.getActiveEndpoints(), drillbitContext.getPlanReader(), rootFragment, initiatingClient.getSession(), queryContext.getQueryContextInfo());
    if (logger.isTraceEnabled()) {
        final StringBuilder sb = new StringBuilder();
        sb.append("PlanFragments for query ");
        sb.append(queryId);
        sb.append('\n');
        final List<PlanFragment> planFragments = queryWorkUnit.getFragments();
        final int fragmentCount = planFragments.size();
        int fragmentIndex = 0;
        for (final PlanFragment planFragment : planFragments) {
            final FragmentHandle fragmentHandle = planFragment.getHandle();
            sb.append("PlanFragment(");
            sb.append(++fragmentIndex);
            sb.append('/');
            sb.append(fragmentCount);
            sb.append(") major_fragment_id ");
            sb.append(fragmentHandle.getMajorFragmentId());
            sb.append(" minor_fragment_id ");
            sb.append(fragmentHandle.getMinorFragmentId());
            sb.append('\n');
            final DrillbitEndpoint endpointAssignment = planFragment.getAssignment();
            sb.append("  DrillbitEndpoint address ");
            sb.append(endpointAssignment.getAddress());
            sb.append('\n');
            String jsonString = "<<malformed JSON>>";
            sb.append("  fragment_json: ");
            final ObjectMapper objectMapper = new ObjectMapper();
            try {
                final Object json = objectMapper.readValue(planFragment.getFragmentJson(), Object.class);
                jsonString = objectMapper.defaultPrettyPrintingWriter().writeValueAsString(json);
            } catch (final Exception e) {
            // we've already set jsonString to a fallback value
            }
            sb.append(jsonString);
            logger.trace(sb.toString());
        }
    }
    return queryWorkUnit;
}
Also used : QueryWorkUnit(org.apache.drill.exec.work.QueryWorkUnit) SimpleParallelizer(org.apache.drill.exec.planner.fragment.SimpleParallelizer) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) Fragment(org.apache.drill.exec.planner.fragment.Fragment) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) UserException(org.apache.drill.common.exceptions.UserException) RpcException(org.apache.drill.exec.rpc.RpcException) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) OptimizerException(org.apache.drill.exec.exception.OptimizerException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) IOException(java.io.IOException) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 3 with PlanFragment

use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.

the class SplittingParallelizer method generateWorkUnits.

/**
 * Split plan into multiple plans based on parallelization
 * Ideally it is applicable only to plans with two major fragments: Screen and UnionExchange
 * But there could be cases where we can remove even multiple exchanges like in case of "order by"
 * End goal is to get single major fragment: Screen with chain that ends up with a single minor fragment
 * from Leaf Exchange. This way each plan can run independently without any exchange involvement
 * @param options
 * @param foremanNode - not really applicable
 * @param queryId
 * @param reader
 * @param rootNode
 * @param planningSet
 * @param session
 * @param queryContextInfo
 * @return
 * @throws ExecutionSetupException
 */
private List<QueryWorkUnit> generateWorkUnits(OptionList options, DrillbitEndpoint foremanNode, QueryId queryId, PhysicalPlanReader reader, Fragment rootNode, PlanningSet planningSet, UserSession session, QueryContextInformation queryContextInfo) throws ExecutionSetupException {
    // now we generate all the individual plan fragments and associated assignments. Note, we need all endpoints
    // assigned before we can materialize, so we start a new loop here rather than utilizing the previous one.
    List<QueryWorkUnit> workUnits = Lists.newArrayList();
    int plansCount = 0;
    DrillbitEndpoint[] endPoints = null;
    long initialAllocation = 0;
    final Iterator<Wrapper> iter = planningSet.iterator();
    while (iter.hasNext()) {
        Wrapper wrapper = iter.next();
        Fragment node = wrapper.getNode();
        boolean isLeafFragment = node.getReceivingExchangePairs().size() == 0;
        final PhysicalOperator physicalOperatorRoot = node.getRoot();
        // get all the needed info from leaf fragment
        if ((physicalOperatorRoot instanceof Exchange) && isLeafFragment) {
            // need to get info about
            // number of minor fragments
            // assignedEndPoints
            // allocation
            plansCount = wrapper.getWidth();
            initialAllocation = (wrapper.getInitialAllocation() != 0) ? wrapper.getInitialAllocation() / plansCount : 0;
            endPoints = new DrillbitEndpoint[plansCount];
            for (int mfId = 0; mfId < plansCount; mfId++) {
                endPoints[mfId] = wrapper.getAssignedEndpoint(mfId);
            }
        }
    }
    if (plansCount == 0) {
        // no exchange, return list of single QueryWorkUnit
        workUnits.add(generateWorkUnit(options, foremanNode, queryId, rootNode, planningSet, session, queryContextInfo));
        return workUnits;
    }
    for (Wrapper wrapper : planningSet) {
        Fragment node = wrapper.getNode();
        final PhysicalOperator physicalOperatorRoot = node.getRoot();
        if (physicalOperatorRoot instanceof Exchange) {
            // get to 0 MajorFragment
            continue;
        }
        boolean isRootNode = rootNode == node;
        if (isRootNode && wrapper.getWidth() != 1) {
            throw new ForemanSetupException(String.format("Failure while trying to setup fragment. " + "The root fragment must always have parallelization one. In the current case, the width was set to %d.", wrapper.getWidth()));
        }
        // this fragment is always leaf, as we are removing all the exchanges
        boolean isLeafFragment = true;
        FragmentHandle handle = // 
        FragmentHandle.newBuilder().setMajorFragmentId(// 
        wrapper.getMajorFragmentId()).setMinorFragmentId(// minor fragment ID is going to be always 0, as plan will be split
        0).setQueryId(// 
        queryId).build();
        // Create a minorFragment for each major fragment.
        for (int minorFragmentId = 0; minorFragmentId < plansCount; minorFragmentId++) {
            // those fragments should be empty
            List<MinorFragmentDefn> fragments = Lists.newArrayList();
            MinorFragmentDefn rootFragment = null;
            FragmentRoot rootOperator = null;
            IndexedFragmentNode iNode = new IndexedFragmentNode(minorFragmentId, wrapper);
            wrapper.resetAllocation();
            // two visitors here
            // 1. To remove exchange
            // 2. To reset operator IDs as exchanges were removed
            PhysicalOperator op = physicalOperatorRoot.accept(ExchangeRemoverMaterializer.INSTANCE, iNode).accept(OperatorIdVisitor.INSTANCE, 0);
            Preconditions.checkArgument(op instanceof FragmentRoot);
            FragmentRoot root = (FragmentRoot) op;
            PlanFragment fragment = // 
            PlanFragment.newBuilder().setForeman(// 
            endPoints[minorFragmentId]).setHandle(// 
            handle).setAssignment(// 
            endPoints[minorFragmentId]).setLeafFragment(// 
            isLeafFragment).setContext(queryContextInfo).setMemInitial(// 
            initialAllocation).setMemMax(// TODO - for some reason OOM is using leaf fragment max allocation divided by width
            wrapper.getMaxAllocation()).setCredentials(session.getCredentials()).addAllCollector(CountRequiredFragments.getCollectors(root)).build();
            MinorFragmentDefn fragmentDefn = new MinorFragmentDefn(fragment, root, options);
            if (isRootNode) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Root fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
                }
                rootFragment = fragmentDefn;
                rootOperator = root;
            } else {
                if (logger.isDebugEnabled()) {
                    logger.debug("Remote fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
                }
                throw new ForemanSetupException(String.format("There should not be non-root/remote fragment present in plan split, but there is:", DrillStringUtils.unescapeJava(fragment.toString())));
            }
            // fragments should be always empty here
            workUnits.add(new QueryWorkUnit(rootOperator, rootFragment, fragments));
        }
    }
    return workUnits;
}
Also used : Wrapper(org.apache.drill.exec.planner.fragment.Wrapper) MinorFragmentDefn(org.apache.drill.exec.work.QueryWorkUnit.MinorFragmentDefn) QueryWorkUnit(org.apache.drill.exec.work.QueryWorkUnit) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) Fragment(org.apache.drill.exec.planner.fragment.Fragment) IndexedFragmentNode(org.apache.drill.exec.planner.fragment.Materializer.IndexedFragmentNode) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) Exchange(org.apache.drill.exec.physical.base.Exchange) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException)

Example 4 with PlanFragment

use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.

the class DrillSeparatePlanningTest method testMultiMinorFragmentComplexQuery.

@Test(timeout = 60_000)
public void testMultiMinorFragmentComplexQuery() throws Exception {
    final String query = "SELECT dir0, sum(o_totalprice) FROM dfs.`multilevel/json` group by dir0 order by dir0";
    QueryPlanFragments planFragments = getFragmentsHelper(query);
    assertNotNull(planFragments);
    assertTrue((planFragments.getFragmentsCount() > 1));
    for (PlanFragment planFragment : planFragments.getFragmentsList()) {
        assertTrue(planFragment.getLeafFragment());
    }
    int rowCount = getResultsHelper(planFragments);
    assertEquals(8, rowCount);
}
Also used : QueryPlanFragments(org.apache.drill.exec.proto.UserProtos.QueryPlanFragments) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ClusterTest(org.apache.drill.test.ClusterTest) Test(org.junit.Test) PlannerTest(org.apache.drill.categories.PlannerTest) SlowTest(org.apache.drill.categories.SlowTest)

Example 5 with PlanFragment

use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.

the class TestFragmentChecker method print.

private void print(String fragmentFile, int bitCount, int expectedFragmentCount) throws Exception {
    System.out.println(String.format("=================Building plan fragments for [%s].  Allowing %d total Drillbits.==================", fragmentFile, bitCount));
    PhysicalPlanReader ppr = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
    Fragment fragmentRoot = getRootFragment(ppr, fragmentFile);
    SimpleParallelizer par = new SimpleParallelizer(1000 * 1000, 5, 10, 1.2);
    List<DrillbitEndpoint> endpoints = Lists.newArrayList();
    DrillbitEndpoint localBit = null;
    for (int i = 0; i < bitCount; i++) {
        DrillbitEndpoint b1 = DrillbitEndpoint.newBuilder().setAddress("localhost").setControlPort(1234 + i).build();
        if (i == 0) {
            localBit = b1;
        }
        endpoints.add(b1);
    }
    final QueryContextInformation queryContextInfo = Utilities.createQueryContextInfo("dummySchemaName", "938ea2d9-7cb9-4baf-9414-a5a0b7777e8e");
    QueryWorkUnit qwu = par.getFragments(new OptionList(), localBit, QueryId.getDefaultInstance(), endpoints, fragmentRoot, UserSession.Builder.newBuilder().withCredentials(UserBitShared.UserCredentials.newBuilder().setUserName("foo").build()).build(), queryContextInfo);
    qwu.applyPlan(ppr);
    System.out.println(String.format("=========ROOT FRAGMENT [%d:%d] =========", qwu.getRootFragment().getHandle().getMajorFragmentId(), qwu.getRootFragment().getHandle().getMinorFragmentId()));
    System.out.print(qwu.getRootFragment().getFragmentJson());
    for (PlanFragment f : qwu.getFragments()) {
        System.out.println(String.format("=========Fragment [%d:%d]=====", f.getHandle().getMajorFragmentId(), f.getHandle().getMinorFragmentId()));
        System.out.print(f.getFragmentJson());
    }
    assertEquals(expectedFragmentCount, qwu.getFragments().size() + 1);
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) PhysicalPlanReader(org.apache.drill.exec.planner.PhysicalPlanReader) QueryWorkUnit(org.apache.drill.exec.work.QueryWorkUnit) SimpleParallelizer(org.apache.drill.exec.planner.fragment.SimpleParallelizer) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) Fragment(org.apache.drill.exec.planner.fragment.Fragment) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) QueryContextInformation(org.apache.drill.exec.proto.BitControl.QueryContextInformation) OptionList(org.apache.drill.exec.server.options.OptionList) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment)

Aggregations

PlanFragment (org.apache.drill.exec.proto.BitControl.PlanFragment)35 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)18 QueryWorkUnit (org.apache.drill.exec.work.QueryWorkUnit)14 PhysicalOperator (org.apache.drill.exec.physical.base.PhysicalOperator)8 Fragment (org.apache.drill.exec.planner.fragment.Fragment)8 FragmentHandle (org.apache.drill.exec.proto.ExecProtos.FragmentHandle)8 IOException (java.io.IOException)7 FragmentRoot (org.apache.drill.exec.physical.base.FragmentRoot)7 QueryPlanFragments (org.apache.drill.exec.proto.UserProtos.QueryPlanFragments)7 ForemanSetupException (org.apache.drill.exec.work.foreman.ForemanSetupException)7 PlannerTest (org.apache.drill.categories.PlannerTest)6 SlowTest (org.apache.drill.categories.SlowTest)6 ClusterTest (org.apache.drill.test.ClusterTest)6 Test (org.junit.Test)6 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)5 MinorFragmentEndpoint (org.apache.drill.exec.physical.MinorFragmentEndpoint)5 IndexedFragmentNode (org.apache.drill.exec.planner.fragment.Materializer.IndexedFragmentNode)5 QueryContextInformation (org.apache.drill.exec.proto.BitControl.QueryContextInformation)5 OptionList (org.apache.drill.exec.server.options.OptionList)5 SimpleParallelizer (org.apache.drill.exec.planner.fragment.SimpleParallelizer)4