Search in sources :

Example 11 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TestFirstInFirstOutComparator method createRequest.

private SubmitWorkRequestProto createRequest(int fragmentNumber, int numSelfAndUpstreamTasks, int numSelfAndUpstreamComplete, int dagStartTime, int attemptStartTime, int withinDagPriority, String dagName) {
    ApplicationId appId = ApplicationId.newInstance(9999, 72);
    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID vId = TezVertexID.getInstance(dagId, 35);
    TezTaskID tId = TezTaskID.getInstance(vId, 389);
    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tId, fragmentNumber);
    return SubmitWorkRequestProto.newBuilder().setAttemptNumber(0).setFragmentNumber(fragmentNumber).setWorkSpec(VertexOrBinary.newBuilder().setVertex(SignableVertexSpec.newBuilder().setQueryIdentifier(QueryIdentifierProto.newBuilder().setApplicationIdString(appId.toString()).setAppAttemptNumber(0).setDagIndex(dagId.getId()).build()).setVertexIndex(vId.getId()).setDagName(dagName).setHiveQueryId(dagName).setVertexName("MockVertex").setUser("MockUser").setTokenIdentifier("MockToken_1").setProcessorDescriptor(EntityDescriptorProto.newBuilder().setClassName("MockProcessor").build()).build()).build()).setAmHost("localhost").setAmPort(12345).setContainerIdString("MockContainer_1").setFragmentRuntimeInfo(LlapDaemonProtocolProtos.FragmentRuntimeInfo.newBuilder().setDagStartTime(dagStartTime).setFirstAttemptStartTime(attemptStartTime).setNumSelfAndUpstreamTasks(numSelfAndUpstreamTasks).setNumSelfAndUpstreamCompletedTasks(numSelfAndUpstreamComplete).setWithinDagPriority(withinDagPriority).build()).build();
}
Also used : TezDAGID(org.apache.tez.dag.records.TezDAGID) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskID(org.apache.tez.dag.records.TezTaskID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 12 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class ContainerRunnerImpl method submitWork.

@Override
public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException {
    LlapTokenInfo tokenInfo = null;
    try {
        tokenInfo = LlapTokenChecker.getTokenInfo(clusterId);
    } catch (SecurityException ex) {
        logSecurityErrorRarely(null);
        throw ex;
    }
    SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo);
    TezEvent initialEvent = extractInitialEvent(request, tokenInfo);
    TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
    String fragmentIdString = attemptId.toString();
    if (LOG.isInfoEnabled()) {
        LOG.info("Queueing container for execution: fragemendId={}, {}", fragmentIdString, stringifySubmitRequest(request, vertex));
    }
    QueryIdentifierProto qIdProto = vertex.getQueryIdentifier();
    HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), constructUniqueQueryId(vertex.getHiveQueryId(), qIdProto.getDagIndex()), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber());
    // This is the start of container-annotated logging.
    final String dagId = attemptId.getTaskID().getVertexID().getDAGId().toString();
    final String queryId = vertex.getHiveQueryId();
    final String fragId = LlapTezUtils.stripAttemptPrefix(fragmentIdString);
    MDC.put("dagId", dagId);
    MDC.put("queryId", queryId);
    MDC.put("fragmentId", fragId);
    // TODO: Ideally we want tez to use CallableWithMdc that retains the MDC for threads created in
    // thread pool. For now, we will push both dagId and queryId into NDC and the custom thread
    // pool that we use for task execution and llap io (StatsRecordingThreadPool) will pop them
    // using reflection and update the MDC.
    NDC.push(dagId);
    NDC.push(queryId);
    NDC.push(fragId);
    Scheduler.SubmissionState submissionState;
    SubmitWorkResponseProto.Builder responseBuilder = SubmitWorkResponseProto.newBuilder();
    try {
        Map<String, String> env = new HashMap<>();
        // TODO What else is required in this environment map.
        env.putAll(localEnv);
        env.put(ApplicationConstants.Environment.USER.name(), vertex.getUser());
        TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(fragmentIdString);
        int dagIdentifier = taskAttemptId.getTaskID().getVertexID().getDAGId().getId();
        QueryIdentifier queryIdentifier = new QueryIdentifier(qIdProto.getApplicationIdString(), dagIdentifier);
        Credentials credentials = new Credentials();
        DataInputBuffer dib = new DataInputBuffer();
        byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
        dib.reset(tokenBytes, tokenBytes.length);
        credentials.readTokenStorageStream(dib);
        Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
        LlapNodeId amNodeId = LlapNodeId.getInstance(request.getAmHost(), request.getAmPort());
        QueryFragmentInfo fragmentInfo = queryTracker.registerFragment(queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId);
        String[] localDirs = fragmentInfo.getLocalDirs();
        Preconditions.checkNotNull(localDirs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Dirs are: " + Arrays.toString(localDirs));
        }
        // May need to setup localDir for re-localization, which is usually setup as Environment.PWD.
        // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
        Configuration callableConf = new Configuration(getConfig());
        UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
        TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf, new ExecutionContextImpl(localAddress.get().getHostName()), env, credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler, this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi, completionListener, socketFactory);
        submissionState = executorService.schedule(callable);
        if (LOG.isInfoEnabled()) {
            LOG.info("SubmissionState for {} : {} ", fragmentIdString, submissionState);
        }
        if (submissionState.equals(Scheduler.SubmissionState.REJECTED)) {
            // Stop tracking the fragment and re-throw the error.
            fragmentComplete(fragmentInfo);
            return responseBuilder.setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
        }
        if (metrics != null) {
            metrics.incrExecutorTotalRequestsHandled();
        }
    } finally {
        MDC.clear();
        NDC.clear();
    }
    return responseBuilder.setUniqueNodeId(daemonId.getUniqueNodeIdInCluster()).setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
Also used : LlapTokenInfo(org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashMap(java.util.HashMap) ByteString(com.google.protobuf.ByteString) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) NotTezEvent(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Credentials(org.apache.hadoop.security.Credentials) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 13 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskUmbilicalExternalClient method submitWork.

/**
   * Submit the work for actual execution.
   * @throws InvalidProtocolBufferException 
   */
public void submitWork(SubmitWorkRequestProto request, String llapHost, int llapPort) {
    // Register the pending events to be sent for this spec.
    VertexOrBinary vob = request.getWorkSpec();
    assert vob.hasVertexBinary() != vob.hasVertex();
    SignableVertexSpec vertex = null;
    try {
        vertex = vob.hasVertex() ? vob.getVertex() : SignableVertexSpec.parseFrom(vob.getVertexBinary());
    } catch (InvalidProtocolBufferException e) {
        throw new RuntimeException(e);
    }
    QueryIdentifierProto queryIdentifierProto = vertex.getQueryIdentifier();
    TezTaskAttemptID attemptId = Converters.createTaskAttemptId(queryIdentifierProto, vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
    final String fragmentId = attemptId.toString();
    final TaskHeartbeatInfo thi = new TaskHeartbeatInfo(fragmentId, llapHost, llapPort);
    pendingEvents.putIfAbsent(fragmentId, new PendingEventData(thi, Lists.<TezEvent>newArrayList()));
    // Setup timer task to check for hearbeat timeouts
    timer.scheduleAtFixedRate(new HeartbeatCheckTask(), connectionTimeout, connectionTimeout, TimeUnit.MILLISECONDS);
    // Send out the actual SubmitWorkRequest
    communicator.sendSubmitWork(request, llapHost, llapPort, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                if (response.getSubmissionState().equals(SubmissionStateProto.REJECTED)) {
                    String msg = "Fragment: " + fragmentId + " rejected. Server Busy.";
                    LOG.info(msg);
                    if (responder != null) {
                        Throwable err = new RuntimeException(msg);
                        responder.submissionFailed(fragmentId, err);
                    }
                    return;
                }
            }
            if (response.hasUniqueNodeId()) {
                thi.uniqueNodeId = response.getUniqueNodeId();
            }
        }

        @Override
        public void indicateError(Throwable t) {
            String msg = "Failed to submit: " + fragmentId;
            LOG.error(msg, t);
            Throwable err = new RuntimeException(msg, t);
            responder.submissionFailed(fragmentId, err);
        }
    });
}
Also used : InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) VertexOrBinary(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.VertexOrBinary) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 14 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskUmbilicalExternalClient method updateHeartbeatInfo.

private void updateHeartbeatInfo(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
    int updateCount = 0;
    HashSet<TezTaskAttemptID> attempts = new HashSet<>();
    for (Writable w : tasks.get()) {
        attempts.add((TezTaskAttemptID) w);
    }
    String error = "";
    for (String key : pendingEvents.keySet()) {
        PendingEventData pendingEventData = pendingEvents.get(key);
        if (pendingEventData != null) {
            TaskHeartbeatInfo thi = pendingEventData.heartbeatInfo;
            String thiUniqueId = thi.uniqueNodeId;
            if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
                TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
                if (attempts.contains(ta)) {
                    thi.lastHeartbeat.set(System.currentTimeMillis());
                    updateCount++;
                } else {
                    error += (thi.taskAttemptId + ", ");
                }
            }
        }
    }
    for (String key : registeredTasks.keySet()) {
        TaskHeartbeatInfo thi = registeredTasks.get(key);
        if (thi != null) {
            String thiUniqueId = thi.uniqueNodeId;
            if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
                TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
                if (attempts.contains(ta)) {
                    thi.lastHeartbeat.set(System.currentTimeMillis());
                    updateCount++;
                } else {
                    error += (thi.taskAttemptId + ", ");
                }
            }
        }
    }
    if (!error.isEmpty()) {
        LOG.info("The tasks we expected to be on the node are not there: " + error);
    }
    if (updateCount == 0) {
        LOG.info("No tasks found for heartbeat from hostname " + hostname + ", port " + port);
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) HashSet(java.util.HashSet) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 15 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TaskRunnerCallable method callInternal.

@Override
protected TaskRunner2Result callInternal() throws Exception {
    setMDCFromNDC();
    try {
        isStarted.set(true);
        this.startTime = System.currentTimeMillis();
        threadName = Thread.currentThread().getName();
        this.threadName = Thread.currentThread().getName();
        if (LOG.isDebugEnabled()) {
            LOG.debug("canFinish: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
        }
        // Unregister from the AMReporter, since the task is now running.
        TezTaskAttemptID ta = taskSpec.getTaskAttemptID();
        this.amReporter.unregisterTask(request.getAmHost(), request.getAmPort(), fragmentInfo.getQueryInfo().getQueryIdentifier(), ta);
        synchronized (this) {
            if (!shouldRunTask) {
                LOG.info("Not starting task {} since it was killed earlier", ta);
                return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, null, false);
            }
        }
        // TODO This executor seems unnecessary. Here and TezChild
        executor = new StatsRecordingThreadPool(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("TezTR-" + threadNameSuffix).build());
        // TODO Consolidate this code with TezChild.
        runtimeWatch.start();
        if (fsTaskUgi == null) {
            fsTaskUgi = UserGroupInformation.createRemoteUser(vertex.getUser());
        }
        fsTaskUgi.addCredentials(credentials);
        Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>();
        serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID, TezCommonUtils.convertJobTokenToBytes(jobToken));
        Multimap<String, String> startedInputsMap = createStartedInputMap(vertex);
        final UserGroupInformation taskOwner = fragmentInfo.getQueryInfo().getUmbilicalUgi();
        if (LOG.isDebugEnabled()) {
            LOG.debug("taskOwner hashCode:" + taskOwner.hashCode());
        }
        final InetSocketAddress address = NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
        umbilical = taskOwner.doAs(new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() {

            @Override
            public LlapTaskUmbilicalProtocol run() throws Exception {
                return RPC.getProxy(LlapTaskUmbilicalProtocol.class, LlapTaskUmbilicalProtocol.versionID, address, taskOwner, conf, socketFactory);
            }
        });
        String fragmentId = LlapTezUtils.stripAttemptPrefix(taskSpec.getTaskAttemptID().toString());
        taskReporter = new LlapTaskReporter(completionListener, umbilical, confParams.amHeartbeatIntervalMsMax, confParams.amCounterHeartbeatInterval, confParams.amMaxEventsPerHeartbeat, new AtomicLong(0), request.getContainerIdString(), fragmentId, initialEvent, requestId);
        String attemptId = fragmentInfo.getFragmentIdentifierString();
        IOContextMap.setThreadAttemptId(attemptId);
        try {
            synchronized (this) {
                if (shouldRunTask) {
                    taskRunner = new TezTaskRunner2(conf, fsTaskUgi, fragmentInfo.getLocalDirs(), taskSpec, vertex.getQueryIdentifier().getAppAttemptNumber(), serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor, objectRegistry, pid, executionContext, memoryAvailable, false, tezHadoopShim);
                }
            }
            if (taskRunner == null) {
                LOG.info("Not starting task {} since it was killed earlier", taskSpec.getTaskAttemptID());
                return new TaskRunner2Result(EndReason.KILL_REQUESTED, null, null, false);
            }
            try {
                TaskRunner2Result result = taskRunner.run();
                if (result.isContainerShutdownRequested()) {
                    LOG.warn("Unexpected container shutdown requested while running task. Ignoring");
                }
                isCompleted.set(true);
                return result;
            } finally {
                FileSystem.closeAllForUGI(fsTaskUgi);
                LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" + runtimeWatch.stop().elapsedMillis());
                if (LOG.isDebugEnabled()) {
                    LOG.debug("canFinish post completion: " + taskSpec.getTaskAttemptID() + ": " + canFinish());
                }
            }
        } finally {
            IOContextMap.clearThreadAttempt(attemptId);
        }
    } finally {
        MDC.clear();
    }
}
Also used : HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) TaskRunner2Result(org.apache.tez.runtime.task.TaskRunner2Result) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ByteBuffer(java.nio.ByteBuffer) AtomicLong(java.util.concurrent.atomic.AtomicLong) TezTaskRunner2(org.apache.tez.runtime.task.TezTaskRunner2) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)15 SignableVertexSpec (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec)5 QueryIdentifierProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto)4 TezDAGID (org.apache.tez.dag.records.TezDAGID)4 TezTaskID (org.apache.tez.dag.records.TezTaskID)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)4 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)4 TaskSpec (org.apache.tez.runtime.api.impl.TaskSpec)4 ByteBuffer (java.nio.ByteBuffer)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 Test (org.junit.Test)3 ByteString (com.google.protobuf.ByteString)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)2 SubmitWorkResponseProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto)2 Writable (org.apache.hadoop.io.Writable)2 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)2