Search in sources :

Example 76 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class TestRecovery method testTwoRoundsRecoverying.

@Test(timeout = 1800000)
public void testTwoRoundsRecoverying() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    TezDAGID dagId = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexId0 = TezVertexID.getInstance(dagId, 0);
    TezVertexID vertexId1 = TezVertexID.getInstance(dagId, 1);
    TezVertexID vertexId2 = TezVertexID.getInstance(dagId, 2);
    ContainerId containerId = ContainerId.newInstance(ApplicationAttemptId.newInstance(appId, 1), 1);
    NodeId nodeId = NodeId.newInstance("localhost", 10);
    List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(InputDataInformationEvent.createWithObjectPayload(0, new Object()), null));
    List<SimpleShutdownCondition> shutdownConditions = Lists.newArrayList(new SimpleShutdownCondition(TIMING.POST, new DAGInitializedEvent(dagId, 0L, "username", "dagName", null)), new SimpleShutdownCondition(TIMING.POST, new DAGStartedEvent(dagId, 0L, "username", "dagName")), new SimpleShutdownCondition(TIMING.POST, new VertexInitializedEvent(vertexId0, "Tokenizer", 0L, 0L, 0, "", null, initGeneratedEvents, null)), new SimpleShutdownCondition(TIMING.POST, new VertexStartedEvent(vertexId0, 0L, 0L)), new SimpleShutdownCondition(TIMING.POST, new VertexConfigurationDoneEvent(vertexId0, 0L, 2, null, null, null, true)), new SimpleShutdownCondition(TIMING.POST, new TaskStartedEvent(TezTaskID.getInstance(vertexId0, 0), "vertexName", 0L, 0L)), new SimpleShutdownCondition(TIMING.POST, new TaskAttemptStartedEvent(TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId0, 0), 0), "vertexName", 0L, containerId, nodeId, "", "", "")), new SimpleShutdownCondition(TIMING.POST, new TaskFinishedEvent(TezTaskID.getInstance(vertexId0, 0), "vertexName", 0L, 0L, null, TaskState.SUCCEEDED, "", new TezCounters(), 0)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId0, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId1, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new VertexFinishedEvent(vertexId2, "vertexName", 1, 0L, 0L, 0L, 0L, 0L, VertexState.SUCCEEDED, "", new TezCounters(), new VertexStats(), new HashMap<String, Integer>(), null)), new SimpleShutdownCondition(TIMING.POST, new DAGFinishedEvent(dagId, 0L, 0L, DAGState.SUCCEEDED, "", new TezCounters(), "username", "dagName", new HashMap<String, Integer>(), ApplicationAttemptId.newInstance(appId, 1), null)));
    Random rand = new Random();
    for (int i = 0; i < shutdownConditions.size() - 1; i++) {
        // timeout.
        if (rand.nextDouble() < 0.5) {
            int nextSimpleConditionIndex = i + 1 + rand.nextInt(shutdownConditions.size() - i - 1);
            if (nextSimpleConditionIndex == shutdownConditions.size() - 1) {
                testOrderedWordCountMultipleRoundRecoverying(new RecoveryServiceWithEventHandlingHook.MultipleRoundShutdownCondition(Lists.newArrayList(shutdownConditions.get(i), shutdownConditions.get(nextSimpleConditionIndex))), true, shutdownConditions.get(i).getHistoryEvent().getEventType() == HistoryEventType.VERTEX_STARTED);
            }
        }
    }
}
Also used : VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) VertexStats(org.apache.tez.dag.app.dag.impl.VertexStats) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) Random(java.util.Random) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) SimpleShutdownCondition(org.apache.tez.test.RecoveryServiceWithEventHandlingHook.SimpleShutdownCondition) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TezCounters(org.apache.tez.common.counters.TezCounters) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) DAGFinishedEvent(org.apache.tez.dag.history.events.DAGFinishedEvent) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) Test(org.junit.Test)

Example 77 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project hive by apache.

the class NotTezEventHelper method toTezEvent.

public static TezEvent toTezEvent(NotTezEvent nte) throws InvalidProtocolBufferException {
    EventMetaData sourceMetaData = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, nte.getVertexName(), "NULL_VERTEX", null);
    EventMetaData destMetaData = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, nte.getVertexName(), nte.getDestInputName(), null);
    InputDataInformationEvent event = ProtoConverters.convertRootInputDataInformationEventFromProto(RootInputDataInformationEventProto.parseFrom(nte.getInputEventProtoBytes()));
    TezEvent tezEvent = new TezEvent(event, sourceMetaData, System.currentTimeMillis());
    tezEvent.setDestinationInfo(destMetaData);
    return tezEvent;
}
Also used : NotTezEvent(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 78 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project hive by apache.

the class ContainerRunnerImpl method submitWork.

@Override
public SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException {
    LlapTokenInfo tokenInfo = null;
    try {
        tokenInfo = LlapTokenChecker.getTokenInfo(clusterId);
    } catch (SecurityException ex) {
        logSecurityErrorRarely(null);
        throw ex;
    }
    SignableVertexSpec vertex = extractVertexSpec(request, tokenInfo);
    TezEvent initialEvent = extractInitialEvent(request, tokenInfo);
    TezTaskAttemptID attemptId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber());
    String fragmentIdString = attemptId.toString();
    QueryIdentifierProto qIdProto = vertex.getQueryIdentifier();
    verifyJwtForExternalClient(request, qIdProto.getApplicationIdString(), fragmentIdString);
    LOG.info("Queueing container for execution: fragemendId={}, {}", fragmentIdString, stringifySubmitRequest(request, vertex));
    HistoryLogger.logFragmentStart(qIdProto.getApplicationIdString(), request.getContainerIdString(), localAddress.get().getHostName(), constructUniqueQueryId(vertex.getHiveQueryId(), qIdProto.getDagIndex()), qIdProto.getDagIndex(), vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber());
    // This is the start of container-annotated logging.
    final String dagId = attemptId.getTaskID().getVertexID().getDAGId().toString();
    final String queryId = vertex.getHiveQueryId();
    final String fragmentId = LlapTezUtils.stripAttemptPrefix(fragmentIdString);
    MDC.put("dagId", dagId);
    MDC.put("queryId", queryId);
    MDC.put("fragmentId", fragmentId);
    // TODO: Ideally we want tez to use CallableWithMdc that retains the MDC for threads created in
    // thread pool. For now, we will push both dagId and queryId into NDC and the custom thread
    // pool that we use for task execution and llap io (StatsRecordingThreadPool) will pop them
    // using reflection and update the MDC.
    NDC.push(dagId);
    NDC.push(queryId);
    NDC.push(fragmentId);
    Scheduler.SubmissionState submissionState;
    SubmitWorkResponseProto.Builder responseBuilder = SubmitWorkResponseProto.newBuilder();
    try {
        Map<String, String> env = new HashMap<>();
        // TODO What else is required in this environment map.
        env.putAll(localEnv);
        env.put(ApplicationConstants.Environment.USER.name(), vertex.getUser());
        TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(fragmentIdString);
        int dagIdentifier = taskAttemptId.getTaskID().getVertexID().getDAGId().getId();
        QueryIdentifier queryIdentifier = new QueryIdentifier(qIdProto.getApplicationIdString(), dagIdentifier);
        Credentials credentials = LlapUtil.credentialsFromByteArray(request.getCredentialsBinary().toByteArray());
        Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
        LlapNodeId amNodeId = LlapNodeId.getInstance(request.getAmHost(), request.getAmPort());
        QueryFragmentInfo fragmentInfo = queryTracker.registerFragment(queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId, ugiPool);
        // May need to setup localDir for re-localization, which is usually setup as Environment.PWD.
        // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
        // Lazy create conf object, as it gets expensive in this codepath.
        Supplier<Configuration> callableConf = () -> new Configuration(getConfig());
        UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
        boolean isGuaranteed = request.hasIsGuaranteed() && request.getIsGuaranteed();
        // enable the printing of (per daemon) LLAP task queue/run times via LLAP_TASK_TIME_SUMMARY
        ConfVars tezSummary = ConfVars.TEZ_EXEC_SUMMARY;
        ConfVars llapTasks = ConfVars.LLAP_TASK_TIME_SUMMARY;
        boolean addTaskTimes = getConfig().getBoolean(tezSummary.varname, tezSummary.defaultBoolVal) && getConfig().getBoolean(llapTasks.varname, llapTasks.defaultBoolVal);
        final String llapHost;
        if (UserGroupInformation.isSecurityEnabled()) {
            // when kerberos is enabled always use FQDN
            llapHost = localAddress.get().getHostName();
        } else if (execUseFQDN) {
            // when FQDN is explicitly requested (default)
            llapHost = localAddress.get().getHostName();
        } else {
            // when FQDN is not requested, use ip address
            llapHost = localAddress.get().getAddress().getHostAddress();
        }
        LOG.info("Using llap host: {} for execution context. hostName: {} hostAddress: {}", llapHost, localAddress.get().getHostName(), localAddress.get().getAddress().getHostAddress());
        // TODO: ideally we'd register TezCounters here, but it seems impossible before registerTask.
        WmFragmentCounters wmCounters = new WmFragmentCounters(addTaskTimes);
        TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf, new ExecutionContextImpl(llapHost), env, credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler, this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi, completionListener, socketFactory, isGuaranteed, wmCounters);
        submissionState = executorService.schedule(callable);
        LOG.info("SubmissionState for {} : {} ", fragmentIdString, submissionState);
        if (submissionState.equals(Scheduler.SubmissionState.REJECTED)) {
            // Stop tracking the fragment and re-throw the error.
            fragmentComplete(fragmentInfo);
            return responseBuilder.setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
        }
        if (metrics != null) {
            metrics.incrExecutorTotalRequestsHandled();
        }
    } finally {
        MDC.clear();
        NDC.clear();
    }
    return responseBuilder.setUniqueNodeId(daemonId.getUniqueNodeIdInCluster()).setSubmissionState(SubmissionStateProto.valueOf(submissionState.name())).build();
}
Also used : LlapTokenInfo(org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashMap(java.util.HashMap) ByteString(com.google.protobuf.ByteString) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) WmFragmentCounters(org.apache.hadoop.hive.llap.counters.WmFragmentCounters) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) NotTezEvent(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.NotTezEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Credentials(org.apache.hadoop.security.Credentials) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Aggregations

TezEvent (org.apache.tez.runtime.api.impl.TezEvent)78 Test (org.junit.Test)50 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)48 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)34 VertexEventRouteEvent (org.apache.tez.dag.app.dag.event.VertexEventRouteEvent)31 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)27 TezTaskID (org.apache.tez.dag.records.TezTaskID)27 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)19 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)17 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)16 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)16 ArrayList (java.util.ArrayList)13 TezVertexID (org.apache.tez.dag.records.TezVertexID)12 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)11 Task (org.apache.tez.dag.app.dag.Task)11 InputReadErrorEvent (org.apache.tez.runtime.api.events.InputReadErrorEvent)11 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)10 InputInitializerEvent (org.apache.tez.runtime.api.events.InputInitializerEvent)10 LinkedList (java.util.LinkedList)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9