Search in sources :

Example 1 with SubmitWorkResponseProto

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.

the class LlapTaskCommunicator method registerRunningTaskAttempt.

@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
    super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
    int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
    if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
        // TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
        // once TEZ-2672 is fixed.
        String hiveQueryId;
        try {
            hiveQueryId = extractQueryId(taskSpec);
        } catch (IOException e) {
            throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
        }
        Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
        resetCurrentDag(dagId, hiveQueryId);
    }
    ContainerInfo containerInfo = getContainerInfo(containerId);
    String host;
    int port;
    if (containerInfo != null) {
        synchronized (containerInfo) {
            host = containerInfo.host;
            port = containerInfo.port;
        }
    } else {
        // TODO Handle this properly
        throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
    }
    LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
    registerKnownNode(nodeId);
    entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
    nodesForQuery.add(nodeId);
    sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
    FragmentRuntimeInfo fragmentRuntimeInfo;
    try {
        fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
    } catch (Exception e) {
        LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
        if (e instanceof RuntimeException) {
            throw (RuntimeException) e;
        } else {
            throw new RuntimeException(e);
        }
    }
    SubmitWorkRequestProto requestProto;
    try {
        requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
    } catch (IOException e) {
        throw new RuntimeException("Failed to construct request", e);
    }
    // Have to register this up front right now. Otherwise, it's possible for the task to start
    // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
    getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
    communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
                if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
                    return;
                }
            } else {
                // This should never happen as server always returns a valid status on success
                throw new RuntimeException("SubmissionState in response is expected!");
            }
            if (response.hasUniqueNodeId()) {
                entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
            }
            LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
        }

        @Override
        public void indicateError(Throwable t) {
            Throwable originalError = t;
            if (t instanceof ServiceException) {
                ServiceException se = (ServiceException) t;
                t = se.getCause();
            }
            if (t instanceof RemoteException) {
                // All others from the remote service cause the task to FAIL.
                LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                processSendError(originalError);
                getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
            } else {
                // Exception from the RPC layer - communication failure, consider as KILLED / service down.
                if (t instanceof IOException) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
                    processSendError(originalError);
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
                } else {
                    // Anything else is a FAIL.
                    LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                    processSendError(originalError);
                    getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
                }
            }
        }
    });
}
Also used : FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ServiceException(com.google.protobuf.ServiceException) RetriableException(org.apache.hadoop.ipc.RetriableException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) TezException(org.apache.tez.dag.api.TezException) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ServiceException(com.google.protobuf.ServiceException) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with SubmitWorkResponseProto

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.

the class ContainerRunnerImpl method submitWork.

@Override
public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException {
    LlapTokenInfo tokenInfo = null;
    try {
        tokenInfo = LlapTokenChecker.getTokenInfo(clusterId);
    } catch (SecurityException ex) {
        logSecurityErrorRarely(null);
        throw ex;
    }
    SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo);
    TezEvent initialEvent = extractInitialEvent(request, tokenInfo);
    TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
    String fragmentIdString = attemptId.toString();
    if (LOG.isInfoEnabled()) {
        LOG.info("Queueing container for execution: fragemendId={}, {}", fragmentIdString, stringifySubmitRequest(request, vertex));
    }
    QueryIdentifierProto qIdProto = vertex.getQueryIdentifier();
    HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), constructUniqueQueryId(vertex.getHiveQueryId(), qIdProto.getDagIndex()), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber());
    // This is the start of container-annotated logging.
    final String dagId = attemptId.getTaskID().getVertexID().getDAGId().toString();
    final String queryId = vertex.getHiveQueryId();
    final String fragId = LlapTezUtils.stripAttemptPrefix(fragmentIdString);
    MDC.put("dagId", dagId);
    MDC.put("queryId", queryId);
    MDC.put("fragmentId", fragId);
    // TODO: Ideally we want tez to use CallableWithMdc that retains the MDC for threads created in
    // thread pool. For now, we will push both dagId and queryId into NDC and the custom thread
    // pool that we use for task execution and llap io (StatsRecordingThreadPool) will pop them
    // using reflection and update the MDC.
    NDC.push(dagId);
    NDC.push(queryId);
    NDC.push(fragId);
    Scheduler.SubmissionState submissionState;
    SubmitWorkResponseProto.Builder responseBuilder = SubmitWorkResponseProto.newBuilder();
    try {
        Map<String, String> env = new HashMap<>();
        // TODO What else is required in this environment map.
        env.putAll(localEnv);
        env.put(ApplicationConstants.Environment.USER.name(), vertex.getUser());
        TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(fragmentIdString);
        int dagIdentifier = taskAttemptId.getTaskID().getVertexID().getDAGId().getId();
        QueryIdentifier queryIdentifier = new QueryIdentifier(qIdProto.getApplicationIdString(), dagIdentifier);
        Credentials credentials = new Credentials();
        DataInputBuffer dib = new DataInputBuffer();
        byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
        dib.reset(tokenBytes, tokenBytes.length);
        credentials.readTokenStorageStream(dib);
        Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
        LlapNodeId amNodeId = LlapNodeId.getInstance(request.getAmHost(), request.getAmPort());
        QueryFragmentInfo fragmentInfo = queryTracker.registerFragment(queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId);
        String[] localDirs = fragmentInfo.getLocalDirs();
        Preconditions.checkNotNull(localDirs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Dirs are: " + Arrays.toString(localDirs));
        }
        // May need to setup localDir for re-localization, which is usually setup as Environment.PWD.
        // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
        Configuration callableConf = new Configuration(getConfig());
        UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
        TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf, new ExecutionContextImpl(localAddress.get().getHostName()), env, credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler, this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi, completionListener, socketFactory);
        submissionState = executorService.schedule(callable);
        if (LOG.isInfoEnabled()) {
            LOG.info("SubmissionState for {} : {} ", fragmentIdString, submissionState);
        }
        if (submissionState.equals(Scheduler.SubmissionState.REJECTED)) {
            // Stop tracking the fragment and re-throw the error.
            fragmentComplete(fragmentInfo);
            return responseBuilder.setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
        }
        if (metrics != null) {
            metrics.incrExecutorTotalRequestsHandled();
        }
    } finally {
        MDC.clear();
        NDC.clear();
    }
    return responseBuilder.setUniqueNodeId(daemonId.getUniqueNodeIdInCluster()).setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
Also used : LlapTokenInfo(org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashMap(java.util.HashMap) ByteString(com.google.protobuf.ByteString) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) NotTezEvent(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Credentials(org.apache.hadoop.security.Credentials) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 3 with SubmitWorkResponseProto

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.

the class LlapTaskUmbilicalExternalClient method submitWork.

/**
   * Submit the work for actual execution.
   * @throws InvalidProtocolBufferException 
   */
public void submitWork(SubmitWorkRequestProto request, String llapHost, int llapPort) {
    // Register the pending events to be sent for this spec.
    VertexOrBinary vob = request.getWorkSpec();
    assert vob.hasVertexBinary() != vob.hasVertex();
    SignableVertexSpec vertex = null;
    try {
        vertex = vob.hasVertex() ? vob.getVertex() : SignableVertexSpec.parseFrom(vob.getVertexBinary());
    } catch (InvalidProtocolBufferException e) {
        throw new RuntimeException(e);
    }
    QueryIdentifierProto queryIdentifierProto = vertex.getQueryIdentifier();
    TezTaskAttemptID attemptId = Converters.createTaskAttemptId(queryIdentifierProto, vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
    final String fragmentId = attemptId.toString();
    final TaskHeartbeatInfo thi = new TaskHeartbeatInfo(fragmentId, llapHost, llapPort);
    pendingEvents.putIfAbsent(fragmentId, new PendingEventData(thi, Lists.<TezEvent>newArrayList()));
    // Setup timer task to check for hearbeat timeouts
    timer.scheduleAtFixedRate(new HeartbeatCheckTask(), connectionTimeout, connectionTimeout, TimeUnit.MILLISECONDS);
    // Send out the actual SubmitWorkRequest
    communicator.sendSubmitWork(request, llapHost, llapPort, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                if (response.getSubmissionState().equals(SubmissionStateProto.REJECTED)) {
                    String msg = "Fragment: " + fragmentId + " rejected. Server Busy.";
                    LOG.info(msg);
                    if (responder != null) {
                        Throwable err = new RuntimeException(msg);
                        responder.submissionFailed(fragmentId, err);
                    }
                    return;
                }
            }
            if (response.hasUniqueNodeId()) {
                thi.uniqueNodeId = response.getUniqueNodeId();
            }
        }

        @Override
        public void indicateError(Throwable t) {
            String msg = "Failed to submit: " + fragmentId;
            LOG.error(msg, t);
            Throwable err = new RuntimeException(msg, t);
            responder.submissionFailed(fragmentId, err);
        }
    });
}
Also used : InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) VertexOrBinary(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.VertexOrBinary) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 4 with SubmitWorkResponseProto

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.

the class TestLlapDaemonProtocolServerImpl method testSimpleCall.

@Test(timeout = 10000)
public void testSimpleCall() throws ServiceException, IOException {
    LlapDaemonConfiguration daemonConf = new LlapDaemonConfiguration();
    int numHandlers = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_RPC_NUM_HANDLERS);
    ContainerRunner containerRunnerMock = mock(ContainerRunner.class);
    LlapProtocolServerImpl server = new LlapProtocolServerImpl(null, numHandlers, containerRunnerMock, new AtomicReference<InetSocketAddress>(), new AtomicReference<InetSocketAddress>(), 0, 0, null);
    when(containerRunnerMock.submitWork(any(SubmitWorkRequestProto.class))).thenReturn(SubmitWorkResponseProto.newBuilder().setSubmissionState(SubmissionStateProto.ACCEPTED).build());
    try {
        server.init(new Configuration());
        server.start();
        InetSocketAddress serverAddr = server.getBindAddress();
        LlapProtocolBlockingPB client = new LlapProtocolClientImpl(new Configuration(), serverAddr.getHostName(), serverAddr.getPort(), null, null, null);
        SubmitWorkResponseProto responseProto = client.submitWork(null, SubmitWorkRequestProto.newBuilder().setAmHost("amhost").setAmPort(2000).build());
        assertEquals(responseProto.getSubmissionState().name(), SubmissionStateProto.ACCEPTED.name());
    } finally {
        server.stop();
    }
}
Also used : LlapDaemonConfiguration(org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration) ContainerRunner(org.apache.hadoop.hive.llap.daemon.ContainerRunner) LlapDaemonConfiguration(org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration) Configuration(org.apache.hadoop.conf.Configuration) InetSocketAddress(java.net.InetSocketAddress) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) LlapProtocolClientImpl(org.apache.hadoop.hive.llap.impl.LlapProtocolClientImpl) LlapProtocolBlockingPB(org.apache.hadoop.hive.llap.protocol.LlapProtocolBlockingPB) Test(org.junit.Test)

Aggregations

SubmitWorkResponseProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto)4 ByteString (com.google.protobuf.ByteString)2 Configuration (org.apache.hadoop.conf.Configuration)2 LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)2 QueryIdentifierProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto)2 SignableVertexSpec (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec)2 SubmitWorkRequestProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto)2 LlapProtocolClientProxy (org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy)2 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)2 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 ServiceException (com.google.protobuf.ServiceException)1 IOException (java.io.IOException)1 InetSocketAddress (java.net.InetSocketAddress)1 HashMap (java.util.HashMap)1 LlapDaemonConfiguration (org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration)1 ContainerRunner (org.apache.hadoop.hive.llap.daemon.ContainerRunner)1 LlapTokenInfo (org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo)1 FragmentRuntimeInfo (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo)1 NotTezEvent (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent)1