use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by apache.
the class Foreman method setupNonRootFragments.
/**
* Set up the non-root fragments for execution. Some may be local, and some may be remote.
* Messages are sent immediately, so they may start returning data even before we complete this.
*
* @param fragments the fragments
* @throws ForemanException
*/
private void setupNonRootFragments(final Collection<PlanFragment> fragments) throws ForemanException {
if (fragments.isEmpty()) {
// nothing to do here
return;
}
/*
* We will send a single message to each endpoint, regardless of how many fragments will be
* executed there. We need to start up the intermediate fragments first so that they will be
* ready once the leaf fragments start producing data. To satisfy both of these, we will
* make a pass through the fragments and put them into these two maps according to their
* leaf/intermediate state, as well as their target drillbit.
*/
final Multimap<DrillbitEndpoint, PlanFragment> leafFragmentMap = ArrayListMultimap.create();
final Multimap<DrillbitEndpoint, PlanFragment> intFragmentMap = ArrayListMultimap.create();
// record all fragments for status purposes.
for (final PlanFragment planFragment : fragments) {
logger.trace("Tracking intermediate remote node {} with data {}", planFragment.getAssignment(), planFragment.getFragmentJson());
queryManager.addFragmentStatusTracker(planFragment, false);
if (planFragment.getLeafFragment()) {
leafFragmentMap.put(planFragment.getAssignment(), planFragment);
} else {
intFragmentMap.put(planFragment.getAssignment(), planFragment);
}
}
/*
* We need to wait for the intermediates to be sent so that they'll be set up by the time
* the leaves start producing data. We'll use this latch to wait for the responses.
*
* However, in order not to hang the process if any of the RPC requests fails, we always
* count down (see FragmentSubmitFailures), but we count the number of failures so that we'll
* know if any submissions did fail.
*/
final int numIntFragments = intFragmentMap.keySet().size();
final ExtendedLatch endpointLatch = new ExtendedLatch(numIntFragments);
final FragmentSubmitFailures fragmentSubmitFailures = new FragmentSubmitFailures();
// send remote intermediate fragments
for (final DrillbitEndpoint ep : intFragmentMap.keySet()) {
sendRemoteFragments(ep, intFragmentMap.get(ep), endpointLatch, fragmentSubmitFailures);
}
final long timeout = RPC_WAIT_IN_MSECS_PER_FRAGMENT * numIntFragments;
if (numIntFragments > 0 && !endpointLatch.awaitUninterruptibly(timeout)) {
long numberRemaining = endpointLatch.getCount();
throw UserException.connectionError().message("Exceeded timeout (%d) while waiting send intermediate work fragments to remote nodes. " + "Sent %d and only heard response back from %d nodes.", timeout, numIntFragments, numIntFragments - numberRemaining).build(logger);
}
// if any of the intermediate fragment submissions failed, fail the query
final List<FragmentSubmitFailures.SubmissionException> submissionExceptions = fragmentSubmitFailures.submissionExceptions;
if (submissionExceptions.size() > 0) {
Set<DrillbitEndpoint> endpoints = Sets.newHashSet();
StringBuilder sb = new StringBuilder();
boolean first = true;
for (FragmentSubmitFailures.SubmissionException e : fragmentSubmitFailures.submissionExceptions) {
DrillbitEndpoint endpoint = e.drillbitEndpoint;
if (endpoints.add(endpoint)) {
if (first) {
first = false;
} else {
sb.append(", ");
}
sb.append(endpoint.getAddress());
}
}
throw UserException.connectionError(submissionExceptions.get(0).rpcException).message("Error setting up remote intermediate fragment execution").addContext("Nodes with failures", sb.toString()).build(logger);
}
injector.injectChecked(queryContext.getExecutionControls(), "send-fragments", ForemanException.class);
/*
* Send the remote (leaf) fragments; we don't wait for these. Any problems will come in through
* the regular sendListener event delivery.
*/
for (final DrillbitEndpoint ep : leafFragmentMap.keySet()) {
sendRemoteFragments(ep, leafFragmentMap.get(ep), null, null);
}
}
use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by apache.
the class Foreman method getQueryWorkUnit.
private QueryWorkUnit getQueryWorkUnit(final PhysicalPlan plan) throws ExecutionSetupException {
final PhysicalOperator rootOperator = plan.getSortedOperators(false).iterator().next();
final Fragment rootFragment = rootOperator.accept(MakeFragmentsVisitor.INSTANCE, null);
final SimpleParallelizer parallelizer = new SimpleParallelizer(queryContext);
final QueryWorkUnit queryWorkUnit = parallelizer.getFragments(queryContext.getOptions().getOptionList(), queryContext.getCurrentEndpoint(), queryId, queryContext.getActiveEndpoints(), drillbitContext.getPlanReader(), rootFragment, initiatingClient.getSession(), queryContext.getQueryContextInfo());
if (logger.isTraceEnabled()) {
final StringBuilder sb = new StringBuilder();
sb.append("PlanFragments for query ");
sb.append(queryId);
sb.append('\n');
final List<PlanFragment> planFragments = queryWorkUnit.getFragments();
final int fragmentCount = planFragments.size();
int fragmentIndex = 0;
for (final PlanFragment planFragment : planFragments) {
final FragmentHandle fragmentHandle = planFragment.getHandle();
sb.append("PlanFragment(");
sb.append(++fragmentIndex);
sb.append('/');
sb.append(fragmentCount);
sb.append(") major_fragment_id ");
sb.append(fragmentHandle.getMajorFragmentId());
sb.append(" minor_fragment_id ");
sb.append(fragmentHandle.getMinorFragmentId());
sb.append('\n');
final DrillbitEndpoint endpointAssignment = planFragment.getAssignment();
sb.append(" DrillbitEndpoint address ");
sb.append(endpointAssignment.getAddress());
sb.append('\n');
String jsonString = "<<malformed JSON>>";
sb.append(" fragment_json: ");
final ObjectMapper objectMapper = new ObjectMapper();
try {
final Object json = objectMapper.readValue(planFragment.getFragmentJson(), Object.class);
jsonString = objectMapper.defaultPrettyPrintingWriter().writeValueAsString(json);
} catch (final Exception e) {
// we've already set jsonString to a fallback value
}
sb.append(jsonString);
logger.trace(sb.toString());
}
}
return queryWorkUnit;
}
use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.
the class SplittingParallelizer method generateWorkUnits.
/**
* Split plan into multiple plans based on parallelization
* Ideally it is applicable only to plans with two major fragments: Screen and UnionExchange
* But there could be cases where we can remove even multiple exchanges like in case of "order by"
* End goal is to get single major fragment: Screen with chain that ends up with a single minor fragment
* from Leaf Exchange. This way each plan can run independently without any exchange involvement
* @param options
* @param foremanNode - not really applicable
* @param queryId
* @param reader
* @param rootNode
* @param planningSet
* @param session
* @param queryContextInfo
* @return
* @throws ExecutionSetupException
*/
private List<QueryWorkUnit> generateWorkUnits(OptionList options, DrillbitEndpoint foremanNode, QueryId queryId, PhysicalPlanReader reader, Fragment rootNode, PlanningSet planningSet, UserSession session, QueryContextInformation queryContextInfo) throws ExecutionSetupException {
// now we generate all the individual plan fragments and associated assignments. Note, we need all endpoints
// assigned before we can materialize, so we start a new loop here rather than utilizing the previous one.
List<QueryWorkUnit> workUnits = Lists.newArrayList();
int plansCount = 0;
DrillbitEndpoint[] endPoints = null;
long initialAllocation = 0;
final Iterator<Wrapper> iter = planningSet.iterator();
while (iter.hasNext()) {
Wrapper wrapper = iter.next();
Fragment node = wrapper.getNode();
boolean isLeafFragment = node.getReceivingExchangePairs().size() == 0;
final PhysicalOperator physicalOperatorRoot = node.getRoot();
// get all the needed info from leaf fragment
if ((physicalOperatorRoot instanceof Exchange) && isLeafFragment) {
// need to get info about
// number of minor fragments
// assignedEndPoints
// allocation
plansCount = wrapper.getWidth();
initialAllocation = (wrapper.getInitialAllocation() != 0) ? wrapper.getInitialAllocation() / plansCount : 0;
endPoints = new DrillbitEndpoint[plansCount];
for (int mfId = 0; mfId < plansCount; mfId++) {
endPoints[mfId] = wrapper.getAssignedEndpoint(mfId);
}
}
}
if (plansCount == 0) {
// no exchange, return list of single QueryWorkUnit
workUnits.add(generateWorkUnit(options, foremanNode, queryId, rootNode, planningSet, session, queryContextInfo));
return workUnits;
}
for (Wrapper wrapper : planningSet) {
Fragment node = wrapper.getNode();
final PhysicalOperator physicalOperatorRoot = node.getRoot();
if (physicalOperatorRoot instanceof Exchange) {
// get to 0 MajorFragment
continue;
}
boolean isRootNode = rootNode == node;
if (isRootNode && wrapper.getWidth() != 1) {
throw new ForemanSetupException(String.format("Failure while trying to setup fragment. " + "The root fragment must always have parallelization one. In the current case, the width was set to %d.", wrapper.getWidth()));
}
// this fragment is always leaf, as we are removing all the exchanges
boolean isLeafFragment = true;
FragmentHandle handle = //
FragmentHandle.newBuilder().setMajorFragmentId(//
wrapper.getMajorFragmentId()).setMinorFragmentId(// minor fragment ID is going to be always 0, as plan will be split
0).setQueryId(//
queryId).build();
// Create a minorFragment for each major fragment.
for (int minorFragmentId = 0; minorFragmentId < plansCount; minorFragmentId++) {
// those fragments should be empty
List<MinorFragmentDefn> fragments = Lists.newArrayList();
MinorFragmentDefn rootFragment = null;
FragmentRoot rootOperator = null;
IndexedFragmentNode iNode = new IndexedFragmentNode(minorFragmentId, wrapper);
wrapper.resetAllocation();
// two visitors here
// 1. To remove exchange
// 2. To reset operator IDs as exchanges were removed
PhysicalOperator op = physicalOperatorRoot.accept(ExchangeRemoverMaterializer.INSTANCE, iNode).accept(OperatorIdVisitor.INSTANCE, 0);
Preconditions.checkArgument(op instanceof FragmentRoot);
FragmentRoot root = (FragmentRoot) op;
PlanFragment fragment = //
PlanFragment.newBuilder().setForeman(//
endPoints[minorFragmentId]).setHandle(//
handle).setAssignment(//
endPoints[minorFragmentId]).setLeafFragment(//
isLeafFragment).setContext(queryContextInfo).setMemInitial(//
initialAllocation).setMemMax(// TODO - for some reason OOM is using leaf fragment max allocation divided by width
wrapper.getMaxAllocation()).setCredentials(session.getCredentials()).addAllCollector(CountRequiredFragments.getCollectors(root)).build();
MinorFragmentDefn fragmentDefn = new MinorFragmentDefn(fragment, root, options);
if (isRootNode) {
if (logger.isDebugEnabled()) {
logger.debug("Root fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
}
rootFragment = fragmentDefn;
rootOperator = root;
} else {
if (logger.isDebugEnabled()) {
logger.debug("Remote fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
}
throw new ForemanSetupException(String.format("There should not be non-root/remote fragment present in plan split, but there is:", DrillStringUtils.unescapeJava(fragment.toString())));
}
// fragments should be always empty here
workUnits.add(new QueryWorkUnit(rootOperator, rootFragment, fragments));
}
}
return workUnits;
}
use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.
the class DrillSeparatePlanningTest method testMultiMinorFragmentComplexQuery.
@Test(timeout = 60_000)
public void testMultiMinorFragmentComplexQuery() throws Exception {
final String query = "SELECT dir0, sum(o_totalprice) FROM dfs.`multilevel/json` group by dir0 order by dir0";
QueryPlanFragments planFragments = getFragmentsHelper(query);
assertNotNull(planFragments);
assertTrue((planFragments.getFragmentsCount() > 1));
for (PlanFragment planFragment : planFragments.getFragmentsList()) {
assertTrue(planFragment.getLeafFragment());
}
int rowCount = getResultsHelper(planFragments);
assertEquals(8, rowCount);
}
use of org.apache.drill.exec.proto.BitControl.PlanFragment in project drill by axbaretto.
the class TestFragmentChecker method print.
private void print(String fragmentFile, int bitCount, int expectedFragmentCount) throws Exception {
System.out.println(String.format("=================Building plan fragments for [%s]. Allowing %d total Drillbits.==================", fragmentFile, bitCount));
PhysicalPlanReader ppr = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(CONFIG);
Fragment fragmentRoot = getRootFragment(ppr, fragmentFile);
SimpleParallelizer par = new SimpleParallelizer(1000 * 1000, 5, 10, 1.2);
List<DrillbitEndpoint> endpoints = Lists.newArrayList();
DrillbitEndpoint localBit = null;
for (int i = 0; i < bitCount; i++) {
DrillbitEndpoint b1 = DrillbitEndpoint.newBuilder().setAddress("localhost").setControlPort(1234 + i).build();
if (i == 0) {
localBit = b1;
}
endpoints.add(b1);
}
final QueryContextInformation queryContextInfo = Utilities.createQueryContextInfo("dummySchemaName", "938ea2d9-7cb9-4baf-9414-a5a0b7777e8e");
QueryWorkUnit qwu = par.getFragments(new OptionList(), localBit, QueryId.getDefaultInstance(), endpoints, fragmentRoot, UserSession.Builder.newBuilder().withCredentials(UserBitShared.UserCredentials.newBuilder().setUserName("foo").build()).build(), queryContextInfo);
qwu.applyPlan(ppr);
System.out.println(String.format("=========ROOT FRAGMENT [%d:%d] =========", qwu.getRootFragment().getHandle().getMajorFragmentId(), qwu.getRootFragment().getHandle().getMinorFragmentId()));
System.out.print(qwu.getRootFragment().getFragmentJson());
for (PlanFragment f : qwu.getFragments()) {
System.out.println(String.format("=========Fragment [%d:%d]=====", f.getHandle().getMajorFragmentId(), f.getHandle().getMinorFragmentId()));
System.out.print(f.getFragmentJson());
}
assertEquals(expectedFragmentCount, qwu.getFragments().size() + 1);
}
Aggregations