use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.
the class LlapTaskCommunicator method registerRunningTaskAttempt.
@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
// TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
// once TEZ-2672 is fixed.
String hiveQueryId;
try {
hiveQueryId = extractQueryId(taskSpec);
} catch (IOException e) {
throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
}
Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
resetCurrentDag(dagId, hiveQueryId);
}
ContainerInfo containerInfo = getContainerInfo(containerId);
String host;
int port;
if (containerInfo != null) {
synchronized (containerInfo) {
host = containerInfo.host;
port = containerInfo.port;
}
} else {
// TODO Handle this properly
throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
}
LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
registerKnownNode(nodeId);
entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
nodesForQuery.add(nodeId);
sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
FragmentRuntimeInfo fragmentRuntimeInfo;
try {
fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
} catch (Exception e) {
LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new RuntimeException(e);
}
}
SubmitWorkRequestProto requestProto;
try {
requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
} catch (IOException e) {
throw new RuntimeException("Failed to construct request", e);
}
// Have to register this up front right now. Otherwise, it's possible for the task to start
// sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
if (response.hasSubmissionState()) {
LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
return;
}
} else {
// This should never happen as server always returns a valid status on success
throw new RuntimeException("SubmissionState in response is expected!");
}
if (response.hasUniqueNodeId()) {
entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
}
LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
}
@Override
public void indicateError(Throwable t) {
Throwable originalError = t;
if (t instanceof ServiceException) {
ServiceException se = (ServiceException) t;
t = se.getCause();
}
if (t instanceof RemoteException) {
// All others from the remote service cause the task to FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
} else {
// Exception from the RPC layer - communication failure, consider as KILLED / service down.
if (t instanceof IOException) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
processSendError(originalError);
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
} else {
// Anything else is a FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
}
}
}
});
}
use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.
the class ContainerRunnerImpl method submitWork.
@Override
public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException {
LlapTokenInfo tokenInfo = null;
try {
tokenInfo = LlapTokenChecker.getTokenInfo(clusterId);
} catch (SecurityException ex) {
logSecurityErrorRarely(null);
throw ex;
}
SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo);
TezEvent initialEvent = extractInitialEvent(request, tokenInfo);
TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
String fragmentIdString = attemptId.toString();
if (LOG.isInfoEnabled()) {
LOG.info("Queueing container for execution: fragemendId={}, {}", fragmentIdString, stringifySubmitRequest(request, vertex));
}
QueryIdentifierProto qIdProto = vertex.getQueryIdentifier();
HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), constructUniqueQueryId(vertex.getHiveQueryId(), qIdProto.getDagIndex()), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber());
// This is the start of container-annotated logging.
final String dagId = attemptId.getTaskID().getVertexID().getDAGId().toString();
final String queryId = vertex.getHiveQueryId();
final String fragId = LlapTezUtils.stripAttemptPrefix(fragmentIdString);
MDC.put("dagId", dagId);
MDC.put("queryId", queryId);
MDC.put("fragmentId", fragId);
// TODO: Ideally we want tez to use CallableWithMdc that retains the MDC for threads created in
// thread pool. For now, we will push both dagId and queryId into NDC and the custom thread
// pool that we use for task execution and llap io (StatsRecordingThreadPool) will pop them
// using reflection and update the MDC.
NDC.push(dagId);
NDC.push(queryId);
NDC.push(fragId);
Scheduler.SubmissionState submissionState;
SubmitWorkResponseProto.Builder responseBuilder = SubmitWorkResponseProto.newBuilder();
try {
Map<String, String> env = new HashMap<>();
// TODO What else is required in this environment map.
env.putAll(localEnv);
env.put(ApplicationConstants.Environment.USER.name(), vertex.getUser());
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(fragmentIdString);
int dagIdentifier = taskAttemptId.getTaskID().getVertexID().getDAGId().getId();
QueryIdentifier queryIdentifier = new QueryIdentifier(qIdProto.getApplicationIdString(), dagIdentifier);
Credentials credentials = new Credentials();
DataInputBuffer dib = new DataInputBuffer();
byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
dib.reset(tokenBytes, tokenBytes.length);
credentials.readTokenStorageStream(dib);
Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
LlapNodeId amNodeId = LlapNodeId.getInstance(request.getAmHost(), request.getAmPort());
QueryFragmentInfo fragmentInfo = queryTracker.registerFragment(queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId);
String[] localDirs = fragmentInfo.getLocalDirs();
Preconditions.checkNotNull(localDirs);
if (LOG.isDebugEnabled()) {
LOG.debug("Dirs are: " + Arrays.toString(localDirs));
}
// May need to setup localDir for re-localization, which is usually setup as Environment.PWD.
// Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
Configuration callableConf = new Configuration(getConfig());
UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf, new ExecutionContextImpl(localAddress.get().getHostName()), env, credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler, this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi, completionListener, socketFactory);
submissionState = executorService.schedule(callable);
if (LOG.isInfoEnabled()) {
LOG.info("SubmissionState for {} : {} ", fragmentIdString, submissionState);
}
if (submissionState.equals(Scheduler.SubmissionState.REJECTED)) {
// Stop tracking the fragment and re-throw the error.
fragmentComplete(fragmentInfo);
return responseBuilder.setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
if (metrics != null) {
metrics.incrExecutorTotalRequestsHandled();
}
} finally {
MDC.clear();
NDC.clear();
}
return responseBuilder.setUniqueNodeId(daemonId.getUniqueNodeIdInCluster()).setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.
the class LlapTaskUmbilicalExternalClient method submitWork.
/**
* Submit the work for actual execution.
* @throws InvalidProtocolBufferException
*/
public void submitWork(SubmitWorkRequestProto request, String llapHost, int llapPort) {
// Register the pending events to be sent for this spec.
VertexOrBinary vob = request.getWorkSpec();
assert vob.hasVertexBinary() != vob.hasVertex();
SignableVertexSpec vertex = null;
try {
vertex = vob.hasVertex() ? vob.getVertex() : SignableVertexSpec.parseFrom(vob.getVertexBinary());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
QueryIdentifierProto queryIdentifierProto = vertex.getQueryIdentifier();
TezTaskAttemptID attemptId = Converters.createTaskAttemptId(queryIdentifierProto, vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
final String fragmentId = attemptId.toString();
final TaskHeartbeatInfo thi = new TaskHeartbeatInfo(fragmentId, llapHost, llapPort);
pendingEvents.putIfAbsent(fragmentId, new PendingEventData(thi, Lists.<TezEvent>newArrayList()));
// Setup timer task to check for hearbeat timeouts
timer.scheduleAtFixedRate(new HeartbeatCheckTask(), connectionTimeout, connectionTimeout, TimeUnit.MILLISECONDS);
// Send out the actual SubmitWorkRequest
communicator.sendSubmitWork(request, llapHost, llapPort, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
if (response.hasSubmissionState()) {
if (response.getSubmissionState().equals(SubmissionStateProto.REJECTED)) {
String msg = "Fragment: " + fragmentId + " rejected. Server Busy.";
LOG.info(msg);
if (responder != null) {
Throwable err = new RuntimeException(msg);
responder.submissionFailed(fragmentId, err);
}
return;
}
}
if (response.hasUniqueNodeId()) {
thi.uniqueNodeId = response.getUniqueNodeId();
}
}
@Override
public void indicateError(Throwable t) {
String msg = "Failed to submit: " + fragmentId;
LOG.error(msg, t);
Throwable err = new RuntimeException(msg, t);
responder.submissionFailed(fragmentId, err);
}
});
}
use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto in project hive by apache.
the class TestLlapDaemonProtocolServerImpl method testSimpleCall.
@Test(timeout = 10000)
public void testSimpleCall() throws ServiceException, IOException {
LlapDaemonConfiguration daemonConf = new LlapDaemonConfiguration();
int numHandlers = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_RPC_NUM_HANDLERS);
ContainerRunner containerRunnerMock = mock(ContainerRunner.class);
LlapProtocolServerImpl server = new LlapProtocolServerImpl(null, numHandlers, containerRunnerMock, new AtomicReference<InetSocketAddress>(), new AtomicReference<InetSocketAddress>(), 0, 0, null);
when(containerRunnerMock.submitWork(any(SubmitWorkRequestProto.class))).thenReturn(SubmitWorkResponseProto.newBuilder().setSubmissionState(SubmissionStateProto.ACCEPTED).build());
try {
server.init(new Configuration());
server.start();
InetSocketAddress serverAddr = server.getBindAddress();
LlapProtocolBlockingPB client = new LlapProtocolClientImpl(new Configuration(), serverAddr.getHostName(), serverAddr.getPort(), null, null, null);
SubmitWorkResponseProto responseProto = client.submitWork(null, SubmitWorkRequestProto.newBuilder().setAmHost("amhost").setAmPort(2000).build());
assertEquals(responseProto.getSubmissionState().name(), SubmissionStateProto.ACCEPTED.name());
} finally {
server.stop();
}
}
Aggregations