use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class TestFirstInFirstOutComparator method createRequest.
private SubmitWorkRequestProto createRequest(int fragmentNumber, int numSelfAndUpstreamTasks, int numSelfAndUpstreamComplete, int dagStartTime, int attemptStartTime, int withinDagPriority, String dagName) {
ApplicationId appId = ApplicationId.newInstance(9999, 72);
TezDAGID dagId = TezDAGID.getInstance(appId, 1);
TezVertexID vId = TezVertexID.getInstance(dagId, 35);
TezTaskID tId = TezTaskID.getInstance(vId, 389);
TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tId, fragmentNumber);
return SubmitWorkRequestProto.newBuilder().setAttemptNumber(0).setFragmentNumber(fragmentNumber).setWorkSpec(VertexOrBinary.newBuilder().setVertex(SignableVertexSpec.newBuilder().setQueryIdentifier(QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(0).setDagIndex(dagId.getId()).build()).setVertexIndex(vId.getId()).setDagName(dagName).setHiveQueryId(dagName).setVertexName("MockVertex").setUser("MockUser").setTokenIdentifier("MockToken_1").setProcessorDescriptor(EntityDescriptorProto.newBuilder().setClassName("MockProcessor").build()).build()).build()).setAmHost("localhost").setAmPort(12345).setContainerIdString("MockContainer_1").setFragmentRuntimeInfo(LlapDaemonProtocolProtos.FragmentRuntimeInfo.newBuilder().setDagStartTime(dagStartTime).setFirstAttemptStartTime(attemptStartTime).setNumSelfAndUpstreamTasks(numSelfAndUpstreamTasks).setNumSelfAndUpstreamCompletedTasks(numSelfAndUpstreamComplete).setWithinDagPriority(withinDagPriority).build()).build();
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class ContainerRunnerImpl method submitWork.
@Override
public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException {
LlapTokenInfo tokenInfo = null;
try {
tokenInfo = LlapTokenChecker.getTokenInfo(clusterId);
} catch (SecurityException ex) {
logSecurityErrorRarely(null);
throw ex;
}
SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo);
TezEvent initialEvent = extractInitialEvent(request, tokenInfo);
TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
String fragmentIdString = attemptId.toString();
if (LOG.isInfoEnabled()) {
LOG.info("Queueing container for execution: fragemendId={}, {}", fragmentIdString, stringifySubmitRequest(request, vertex));
}
QueryIdentifierProto qIdProto = vertex.getQueryIdentifier();
HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), constructUniqueQueryId(vertex.getHiveQueryId(), qIdProto.getDagIndex()), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber());
// This is the start of container-annotated logging.
final String dagId = attemptId.getTaskID().getVertexID().getDAGId().toString();
final String queryId = vertex.getHiveQueryId();
final String fragId = LlapTezUtils.stripAttemptPrefix(fragmentIdString);
MDC.put("dagId", dagId);
MDC.put("queryId", queryId);
MDC.put("fragmentId", fragId);
// TODO: Ideally we want tez to use CallableWithMdc that retains the MDC for threads created in
// thread pool. For now, we will push both dagId and queryId into NDC and the custom thread
// pool that we use for task execution and llap io (StatsRecordingThreadPool) will pop them
// using reflection and update the MDC.
NDC.push(dagId);
NDC.push(queryId);
NDC.push(fragId);
Scheduler.SubmissionState submissionState;
SubmitWorkResponseProto.Builder responseBuilder = SubmitWorkResponseProto.newBuilder();
try {
Map<String, String> env = new HashMap<>();
// TODO What else is required in this environment map.
env.putAll(localEnv);
env.put(ApplicationConstants.Environment.USER.name(), vertex.getUser());
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(fragmentIdString);
int dagIdentifier = taskAttemptId.getTaskID().getVertexID().getDAGId().getId();
QueryIdentifier queryIdentifier = new QueryIdentifier(qIdProto.getApplicationIdString(), dagIdentifier);
Credentials credentials = new Credentials();
DataInputBuffer dib = new DataInputBuffer();
byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
dib.reset(tokenBytes, tokenBytes.length);
credentials.readTokenStorageStream(dib);
Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
LlapNodeId amNodeId = LlapNodeId.getInstance(request.getAmHost(), request.getAmPort());
QueryFragmentInfo fragmentInfo = queryTracker.registerFragment(queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId);
String[] localDirs = fragmentInfo.getLocalDirs();
Preconditions.checkNotNull(localDirs);
if (LOG.isDebugEnabled()) {
LOG.debug("Dirs are: " + Arrays.toString(localDirs));
}
// May need to setup localDir for re-localization, which is usually setup as Environment.PWD.
// Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
Configuration callableConf = new Configuration(getConfig());
UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf, new ExecutionContextImpl(localAddress.get().getHostName()), env, credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler, this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi, completionListener, socketFactory);
submissionState = executorService.schedule(callable);
if (LOG.isInfoEnabled()) {
LOG.info("SubmissionState for {} : {} ", fragmentIdString, submissionState);
}
if (submissionState.equals(Scheduler.SubmissionState.REJECTED)) {
// Stop tracking the fragment and re-throw the error.
fragmentComplete(fragmentInfo);
return responseBuilder.setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
if (metrics != null) {
metrics.incrExecutorTotalRequestsHandled();
}
} finally {
MDC.clear();
NDC.clear();
}
return responseBuilder.setUniqueNodeId(daemonId.getUniqueNodeIdInCluster()).setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class LlapTaskUmbilicalExternalClient method submitWork.
/**
* Submit the work for actual execution.
* @throws InvalidProtocolBufferException
*/
public void submitWork(SubmitWorkRequestProto request, String llapHost, int llapPort) {
// Register the pending events to be sent for this spec.
VertexOrBinary vob = request.getWorkSpec();
assert vob.hasVertexBinary() != vob.hasVertex();
SignableVertexSpec vertex = null;
try {
vertex = vob.hasVertex() ? vob.getVertex() : SignableVertexSpec.parseFrom(vob.getVertexBinary());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
QueryIdentifierProto queryIdentifierProto = vertex.getQueryIdentifier();
TezTaskAttemptID attemptId = Converters.createTaskAttemptId(queryIdentifierProto, vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
final String fragmentId = attemptId.toString();
final TaskHeartbeatInfo thi = new TaskHeartbeatInfo(fragmentId, llapHost, llapPort);
pendingEvents.putIfAbsent(fragmentId, new PendingEventData(thi, Lists.<TezEvent>newArrayList()));
// Setup timer task to check for hearbeat timeouts
timer.scheduleAtFixedRate(new HeartbeatCheckTask(), connectionTimeout, connectionTimeout, TimeUnit.MILLISECONDS);
// Send out the actual SubmitWorkRequest
communicator.sendSubmitWork(request, llapHost, llapPort, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
if (response.hasSubmissionState()) {
if (response.getSubmissionState().equals(SubmissionStateProto.REJECTED)) {
String msg = "Fragment: " + fragmentId + " rejected. Server Busy.";
LOG.info(msg);
if (responder != null) {
Throwable err = new RuntimeException(msg);
responder.submissionFailed(fragmentId, err);
}
return;
}
}
if (response.hasUniqueNodeId()) {
thi.uniqueNodeId = response.getUniqueNodeId();
}
}
@Override
public void indicateError(Throwable t) {
String msg = "Failed to submit: " + fragmentId;
LOG.error(msg, t);
Throwable err = new RuntimeException(msg, t);
responder.submissionFailed(fragmentId, err);
}
});
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class LlapTaskUmbilicalExternalClient method updateHeartbeatInfo.
private void updateHeartbeatInfo(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
int updateCount = 0;
HashSet<TezTaskAttemptID> attempts = new HashSet<>();
for (Writable w : tasks.get()) {
attempts.add((TezTaskAttemptID) w);
}
String error = "";
for (String key : pendingEvents.keySet()) {
PendingEventData pendingEventData = pendingEvents.get(key);
if (pendingEventData != null) {
TaskHeartbeatInfo thi = pendingEventData.heartbeatInfo;
String thiUniqueId = thi.uniqueNodeId;
if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
if (attempts.contains(ta)) {
thi.lastHeartbeat.set(System.currentTimeMillis());
updateCount++;
} else {
error += (thi.taskAttemptId + ", ");
}
}
}
}
for (String key : registeredTasks.keySet()) {
TaskHeartbeatInfo thi = registeredTasks.get(key);
if (thi != null) {
String thiUniqueId = thi.uniqueNodeId;
if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
if (attempts.contains(ta)) {
thi.lastHeartbeat.set(System.currentTimeMillis());
updateCount++;
} else {
error += (thi.taskAttemptId + ", ");
}
}
}
}
if (!error.isEmpty()) {
LOG.info("The tasks we expected to be on the node are not there: " + error);
}
if (updateCount == 0) {
LOG.info("No tasks found for heartbeat from hostname " + hostname + ", port " + port);
}
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class TaskRunnerCallable method callInternal.
@Override
protected TaskRunner2Result callInternal() throws Exception {
setMDCFromNDC();
try {
isStarted.set(true);
this.startTime = System.currentTimeMillis();
threadName = Thread.currentThread().getName();
this.threadName = Thread.currentThread().getName();
if (LOG.isDebugEnabled()) {
LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
}
// Unregister from the AMReporter, since the task is now running.
TezTaskAttemptID ta = taskSpec.getTaskAttemptID();
this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort(), fragmentInfo.getQueryInfo().getQueryIdentifier(), ta);
synchronized (this) {
if (!shouldRunTask) {
LOG.info("Not starting task {} since it was killed earlier", ta);
return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, null, false);
}
}
// TODO This executor seems unnecessary. Here and TezChild
executor = new StatsRecordingThreadPool(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTR-" + threadNameSuffix).build());
// TODO Consolidate this code with TezChild.
runtimeWatch.start();
if (fsTaskUgi == null) {
fsTaskUgi = UserGroupInformation.createRemoteUser(vertex.getUser());
}
fsTaskUgi.addCredentials(credentials);
Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>();
serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID, TezCommonUtils.convertJobTokenToBytes(jobToken));
Multimap<String, String> startedInputsMap = createStartedInputMap(vertex);
final UserGroupInformation taskOwner = fragmentInfo.getQueryInfo().getUmbilicalUgi();
if (LOG.isDebugEnabled()) {
LOG.debug("taskOwner hashCode:" + taskOwner.hashCode());
}
final InetSocketAddress address = NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
umbilical = taskOwner.doAs(new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() {
@Override
public LlapTaskUmbilicalProtocol run() throws Exception {
return RPC.getProxy(LlapTaskUmbilicalProtocol.class, LlapTaskUmbilicalProtocol.versionID, address, taskOwner, conf, socketFactory);
}
});
String fragmentId = LlapTezUtils.stripAttemptPrefix(taskSpec.getTaskAttemptID().toString());
taskReporter = new LlapTaskReporter(completionListener, umbilical, confParams.amHeartbeatIntervalMsMax, confParams.amCounterHeartbeatInterval, confParams.amMaxEventsPerHeartbeat, new AtomicLong(0), request.getContainerIdString(), fragmentId, initialEvent, requestId);
String attemptId = fragmentInfo.getFragmentIdentifierString();
IOContextMap.setThreadAttemptId(attemptId);
try {
synchronized (this) {
if (shouldRunTask) {
taskRunner = new TezTaskRunner2(conf, fsTaskUgi, fragmentInfo.getLocalDirs(), taskSpec, vertex.getQueryIdentifier().getAppAttemptNumber(), serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry, pid, executionContext, memoryAvailable, false, tezHadoopShim);
}
}
if (taskRunner == null) {
LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, null, false);
}
try {
TaskRunner2Result result = taskRunner.run();
if (result.isContainerShutdownRequested()) {
LOG.warn("Unexpected container shutdown requested while running task. Ignoring");
}
isCompleted.set(true);
return result;
} finally {
FileSystem.closeAllForUGI(fsTaskUgi);
LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" + runtimeWatch.stop().elapsedMillis());
if (LOG.isDebugEnabled()) {
LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
}
}
} finally {
IOContextMap.clearThreadAttempt(attemptId);
}
} finally {
MDC.clear();
}
}
Aggregations