Search in sources :

Example 1 with LogAggregationStatus

use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.

the class HadoopArchiveLogs method filterAppsByAggregatedStatus.

@VisibleForTesting
void filterAppsByAggregatedStatus() throws IOException, YarnException {
    YarnClient client = YarnClient.createYarnClient();
    try {
        client.init(getConf());
        client.start();
        for (Iterator<AppInfo> it = eligibleApplications.iterator(); it.hasNext(); ) {
            AppInfo app = it.next();
            try {
                ApplicationReport report = client.getApplicationReport(ApplicationId.fromString(app.getAppId()));
                LogAggregationStatus aggStatus = report.getLogAggregationStatus();
                if (aggStatus.equals(LogAggregationStatus.RUNNING) || aggStatus.equals(LogAggregationStatus.RUNNING_WITH_FAILURE) || aggStatus.equals(LogAggregationStatus.NOT_START) || aggStatus.equals(LogAggregationStatus.DISABLED) || aggStatus.equals(LogAggregationStatus.FAILED)) {
                    if (verbose) {
                        LOG.info("Skipping " + app.getAppId() + " due to aggregation status being " + aggStatus);
                    }
                    it.remove();
                } else {
                    if (verbose) {
                        LOG.info(app.getAppId() + " has aggregation status " + aggStatus);
                    }
                    app.setFinishTime(report.getFinishTime());
                }
            } catch (ApplicationNotFoundException e) {
                // Assume the aggregation has finished
                if (verbose) {
                    LOG.info(app.getAppId() + " not in the ResourceManager");
                }
            }
        }
    } finally {
        if (client != null) {
            client.stop();
        }
    }
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with LogAggregationStatus

use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.

the class AppBlock method generateOverviewTable.

/**
   * Generate overview table for app web page.
   * @param app app info.
   * @param schedulerPath schedule path.
   * @param webUiType web ui type.
   * @param appReport app report.
   */
private void generateOverviewTable(AppInfo app, String schedulerPath, String webUiType, ApplicationReport appReport) {
    ResponseInfo overviewTable = info("Application Overview")._("User:", schedulerPath, app.getUser())._("Name:", app.getName())._("Application Type:", app.getType())._("Application Tags:", app.getApplicationTags() == null ? "" : app.getApplicationTags())._("Application Priority:", clarifyAppPriority(app.getPriority()))._("YarnApplicationState:", app.getAppState() == null ? UNAVAILABLE : clarifyAppState(app.getAppState()))._("Queue:", schedulerPath, app.getQueue())._("FinalStatus Reported by AM:", clairfyAppFinalStatus(app.getFinalAppStatus()))._("Started:", Times.format(app.getStartedTime()))._("Elapsed:", StringUtils.formatTime(Times.elapsed(app.getStartedTime(), app.getFinishedTime())))._("Tracking URL:", app.getTrackingUrl() == null || app.getTrackingUrl().equals(UNAVAILABLE) ? null : root_url(app.getTrackingUrl()), app.getTrackingUrl() == null || app.getTrackingUrl().equals(UNAVAILABLE) ? "Unassigned" : app.getAppState() == YarnApplicationState.FINISHED || app.getAppState() == YarnApplicationState.FAILED || app.getAppState() == YarnApplicationState.KILLED ? "History" : "ApplicationMaster");
    if (webUiType != null && webUiType.equals(YarnWebParams.RM_WEB_UI)) {
        LogAggregationStatus status = getLogAggregationStatus();
        if (status == null) {
            overviewTable._("Log Aggregation Status:", "N/A");
        } else if (status == LogAggregationStatus.DISABLED || status == LogAggregationStatus.NOT_START || status == LogAggregationStatus.SUCCEEDED) {
            overviewTable._("Log Aggregation Status:", status.name());
        } else {
            overviewTable._("Log Aggregation Status:", root_url("logaggregationstatus", app.getAppId()), status.name());
        }
        long timeout = appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME).getRemainingTime();
        if (timeout < 0) {
            overviewTable._("Application Timeout (Remaining Time):", "Unlimited");
        } else {
            overviewTable._("Application Timeout (Remaining Time):", String.format("%d seconds", timeout));
        }
    }
    overviewTable._("Diagnostics:", app.getDiagnosticsInfo() == null ? "" : app.getDiagnosticsInfo());
    overviewTable._("Unmanaged Application:", app.isUnmanagedApp());
    overviewTable._("Application Node Label expression:", app.getAppNodeLabelExpression() == null ? "<Not set>" : app.getAppNodeLabelExpression());
    overviewTable._("AM container Node Label expression:", app.getAmNodeLabelExpression() == null ? "<Not set>" : app.getAmNodeLabelExpression());
}
Also used : ResponseInfo(org.apache.hadoop.yarn.webapp.ResponseInfo) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus)

Example 3 with LogAggregationStatus

use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.

the class RMAppImpl method createAndGetApplicationReport.

@Override
public ApplicationReport createAndGetApplicationReport(String clientUserName, boolean allowAccess) {
    this.readLock.lock();
    try {
        ApplicationAttemptId currentApplicationAttemptId = null;
        org.apache.hadoop.yarn.api.records.Token clientToAMToken = null;
        String trackingUrl = UNAVAILABLE;
        String host = UNAVAILABLE;
        String origTrackingUrl = UNAVAILABLE;
        LogAggregationStatus logAggregationStatus = null;
        int rpcPort = -1;
        ApplicationResourceUsageReport appUsageReport = RMServerUtils.DUMMY_APPLICATION_RESOURCE_USAGE_REPORT;
        FinalApplicationStatus finishState = getFinalApplicationStatus();
        String diags = UNAVAILABLE;
        float progress = 0.0f;
        org.apache.hadoop.yarn.api.records.Token amrmToken = null;
        if (allowAccess) {
            trackingUrl = getDefaultProxyTrackingUrl();
            if (this.currentAttempt != null) {
                currentApplicationAttemptId = this.currentAttempt.getAppAttemptId();
                trackingUrl = this.currentAttempt.getTrackingUrl();
                origTrackingUrl = this.currentAttempt.getOriginalTrackingUrl();
                if (UserGroupInformation.isSecurityEnabled()) {
                    // get a token so the client can communicate with the app attempt
                    // NOTE: token may be unavailable if the attempt is not running
                    Token<ClientToAMTokenIdentifier> attemptClientToAMToken = this.currentAttempt.createClientToken(clientUserName);
                    if (attemptClientToAMToken != null) {
                        clientToAMToken = BuilderUtils.newClientToAMToken(attemptClientToAMToken.getIdentifier(), attemptClientToAMToken.getKind().toString(), attemptClientToAMToken.getPassword(), attemptClientToAMToken.getService().toString());
                    }
                }
                host = this.currentAttempt.getHost();
                rpcPort = this.currentAttempt.getRpcPort();
                appUsageReport = currentAttempt.getApplicationResourceUsageReport();
                progress = currentAttempt.getProgress();
                logAggregationStatus = this.getLogAggregationStatusForAppReport();
            }
            //if the diagnostics is not already set get it from attempt
            diags = getDiagnostics().toString();
            if (currentAttempt != null && currentAttempt.getAppAttemptState() == RMAppAttemptState.LAUNCHED) {
                if (getApplicationSubmissionContext().getUnmanagedAM() && clientUserName != null && getUser().equals(clientUserName)) {
                    Token<AMRMTokenIdentifier> token = currentAttempt.getAMRMToken();
                    if (token != null) {
                        amrmToken = BuilderUtils.newAMRMToken(token.getIdentifier(), token.getKind().toString(), token.getPassword(), token.getService().toString());
                    }
                }
            }
            RMAppMetrics rmAppMetrics = getRMAppMetrics();
            appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds());
            appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds());
            appUsageReport.setPreemptedMemorySeconds(rmAppMetrics.getPreemptedMemorySeconds());
            appUsageReport.setPreemptedVcoreSeconds(rmAppMetrics.getPreemptedVcoreSeconds());
        }
        if (currentApplicationAttemptId == null) {
            currentApplicationAttemptId = BuilderUtils.newApplicationAttemptId(this.applicationId, DUMMY_APPLICATION_ATTEMPT_NUMBER);
        }
        ApplicationReport report = BuilderUtils.newApplicationReport(this.applicationId, currentApplicationAttemptId, this.user, this.queue, this.name, host, rpcPort, clientToAMToken, createApplicationState(), diags, trackingUrl, this.startTime, this.finishTime, finishState, appUsageReport, origTrackingUrl, progress, this.applicationType, amrmToken, applicationTags, this.getApplicationPriority());
        report.setLogAggregationStatus(logAggregationStatus);
        report.setUnmanagedApp(submissionContext.getUnmanagedAM());
        report.setAppNodeLabelExpression(getAppNodeLabelExpression());
        report.setAmNodeLabelExpression(getAmNodeLabelExpression());
        ApplicationTimeout timeout = ApplicationTimeout.newInstance(ApplicationTimeoutType.LIFETIME, UNLIMITED, UNKNOWN);
        // are supported in YARN-5692, the below logic need to be changed.
        if (!this.applicationTimeouts.isEmpty()) {
            long timeoutInMillis = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME).longValue();
            timeout.setExpiryTime(Times.formatISO8601(timeoutInMillis));
            if (isAppInCompletedStates()) {
                // if application configured with timeout and finished before timeout
                // happens then remaining time should not be calculated.
                timeout.setRemainingTime(0);
            } else {
                timeout.setRemainingTime(Math.max((timeoutInMillis - systemClock.getTime()) / 1000, 0));
            }
        }
        report.setApplicationTimeouts(Collections.singletonMap(timeout.getTimeoutType(), timeout));
        return report;
    } finally {
        this.readLock.unlock();
    }
}
Also used : FinalApplicationStatus(org.apache.hadoop.yarn.api.records.FinalApplicationStatus) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) ClientToAMTokenIdentifier(org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ApplicationResourceUsageReport(org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport) ApplicationTimeout(org.apache.hadoop.yarn.api.records.ApplicationTimeout) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus)

Example 4 with LogAggregationStatus

use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.

the class RMAppLogAggregationStatusBlock method render.

@Override
protected void render(Block html) {
    String aid = $(APPLICATION_ID);
    if (aid.isEmpty()) {
        puts("Bad request: requires Application ID");
        return;
    }
    ApplicationId appId;
    try {
        appId = Apps.toAppID(aid);
    } catch (Exception e) {
        puts("Invalid Application ID: " + aid);
        return;
    }
    setTitle(join("Application ", aid));
    // Add LogAggregationStatus description table
    // to explain the meaning of different LogAggregationStatus
    DIV<Hamlet> div_description = html.div(_INFO_WRAP);
    TABLE<DIV<Hamlet>> table_description = div_description.table("#LogAggregationStatusDecription");
    table_description.tr().th(_TH, "Log Aggregation Status").th(_TH, "Description")._();
    table_description.tr().td(LogAggregationStatus.DISABLED.name()).td("Log Aggregation is Disabled.")._();
    table_description.tr().td(LogAggregationStatus.NOT_START.name()).td("Log Aggregation does not Start.")._();
    table_description.tr().td(LogAggregationStatus.RUNNING.name()).td("Log Aggregation is Running.")._();
    table_description.tr().td(LogAggregationStatus.RUNNING_WITH_FAILURE.name()).td("Log Aggregation is Running, but has failures " + "in previous cycles")._();
    table_description.tr().td(LogAggregationStatus.SUCCEEDED.name()).td("Log Aggregation is Succeeded. All of the logs have been " + "aggregated successfully.")._();
    table_description.tr().td(LogAggregationStatus.FAILED.name()).td("Log Aggregation is Failed. At least one of the logs " + "have not been aggregated.")._();
    table_description.tr().td(LogAggregationStatus.TIME_OUT.name()).td("The application is finished, but the log aggregation status is " + "not updated for a long time. Not sure whether the log aggregation " + "is finished or not.")._();
    table_description._();
    div_description._();
    RMApp rmApp = rm.getRMContext().getRMApps().get(appId);
    // Application Log aggregation status Table
    DIV<Hamlet> div = html.div(_INFO_WRAP);
    TABLE<DIV<Hamlet>> table = div.h3("Log Aggregation: " + (rmApp == null ? "N/A" : rmApp.getLogAggregationStatusForAppReport() == null ? "N/A" : rmApp.getLogAggregationStatusForAppReport().name())).table("#LogAggregationStatus");
    int maxLogAggregationDiagnosticsInMemory = conf.getInt(YarnConfiguration.RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY, YarnConfiguration.DEFAULT_RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY);
    table.tr().th(_TH, "NodeId").th(_TH, "Log Aggregation Status").th(_TH, "Last " + maxLogAggregationDiagnosticsInMemory + " Diagnostic Messages").th(_TH, "Last " + maxLogAggregationDiagnosticsInMemory + " Failure Messages")._();
    if (rmApp != null) {
        Map<NodeId, LogAggregationReport> logAggregationReports = rmApp.getLogAggregationReportsForApp();
        if (logAggregationReports != null && !logAggregationReports.isEmpty()) {
            for (Entry<NodeId, LogAggregationReport> report : logAggregationReports.entrySet()) {
                LogAggregationStatus status = report.getValue() == null ? null : report.getValue().getLogAggregationStatus();
                String message = report.getValue() == null ? null : report.getValue().getDiagnosticMessage();
                String failureMessage = report.getValue() == null ? null : ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(report.getKey());
                table.tr().td(report.getKey().toString()).td(status == null ? "N/A" : status.toString()).td(message == null ? "N/A" : message).td(failureMessage == null ? "N/A" : failureMessage)._();
            }
        }
    }
    table._();
    div._();
}
Also used : Hamlet(org.apache.hadoop.yarn.webapp.hamlet.Hamlet) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) DIV(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV) LogAggregationReport(org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 5 with LogAggregationStatus

use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.

the class AppLogAggregatorImpl method uploadLogsForContainers.

private void uploadLogsForContainers(boolean appFinished) {
    if (this.logAggregationDisabled) {
        return;
    }
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials systemCredentials = context.getSystemCredentialsForApps().get(appId);
        if (systemCredentials != null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding new framework-token for " + appId + " for log-aggregation: " + systemCredentials.getAllTokens() + "; userUgi=" + userUgi);
            }
            // this will replace old token
            userUgi.addCredentials(systemCredentials);
        }
    }
    // Create a set of Containers whose logs will be uploaded in this cycle.
    // It includes:
    // a) all containers in pendingContainers: those containers are finished
    //    and satisfy the ContainerLogAggregationPolicy.
    // b) some set of running containers: For all the Running containers,
    //    we use exitCode of 0 to find those which satisfy the
    //    ContainerLogAggregationPolicy.
    Set<ContainerId> pendingContainerInThisCycle = new HashSet<ContainerId>();
    this.pendingContainers.drainTo(pendingContainerInThisCycle);
    Set<ContainerId> finishedContainers = new HashSet<ContainerId>(pendingContainerInThisCycle);
    if (this.context.getApplications().get(this.appId) != null) {
        for (Container container : this.context.getApplications().get(this.appId).getContainers().values()) {
            ContainerType containerType = container.getContainerTokenIdentifier().getContainerType();
            if (shouldUploadLogs(new ContainerLogContext(container.getContainerId(), containerType, 0))) {
                pendingContainerInThisCycle.add(container.getContainerId());
            }
        }
    }
    LogWriter writer = null;
    String diagnosticMessage = "";
    boolean logAggregationSucceedInThisCycle = true;
    try {
        if (pendingContainerInThisCycle.isEmpty()) {
            return;
        }
        logAggregationTimes++;
        try {
            writer = createLogWriter();
            // Write ACLs once when the writer is created.
            writer.writeApplicationACLs(appAcls);
            writer.writeApplicationOwner(this.userUgi.getShortUserName());
        } catch (IOException e1) {
            logAggregationSucceedInThisCycle = false;
            LOG.error("Cannot create writer for app " + this.applicationId + ". Skip log upload this time. ", e1);
            return;
        }
        boolean uploadedLogsInThisCycle = false;
        for (ContainerId container : pendingContainerInThisCycle) {
            ContainerLogAggregator aggregator = null;
            if (containerLogAggregators.containsKey(container)) {
                aggregator = containerLogAggregators.get(container);
            } else {
                aggregator = new ContainerLogAggregator(container);
                containerLogAggregators.put(container, aggregator);
            }
            Set<Path> uploadedFilePathsInThisCycle = aggregator.doContainerLogAggregation(writer, appFinished, finishedContainers.contains(container));
            if (uploadedFilePathsInThisCycle.size() > 0) {
                uploadedLogsInThisCycle = true;
                this.delService.delete(this.userUgi.getShortUserName(), null, uploadedFilePathsInThisCycle.toArray(new Path[uploadedFilePathsInThisCycle.size()]));
            }
            // remove it from containerLogAggregators.
            if (finishedContainers.contains(container)) {
                containerLogAggregators.remove(container);
            }
        }
        // is smaller than the configured NM log aggregation retention size.
        if (uploadedLogsInThisCycle && logAggregationInRolling) {
            cleanOldLogs();
            cleanupOldLogTimes++;
        }
        if (writer != null) {
            writer.close();
            writer = null;
        }
        long currentTime = System.currentTimeMillis();
        final Path renamedPath = this.rollingMonitorInterval <= 0 ? remoteNodeLogFileForApp : new Path(remoteNodeLogFileForApp.getParent(), remoteNodeLogFileForApp.getName() + "_" + currentTime);
        final boolean rename = uploadedLogsInThisCycle;
        try {
            userUgi.doAs(new PrivilegedExceptionAction<Object>() {

                @Override
                public Object run() throws Exception {
                    FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf);
                    if (rename) {
                        remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath);
                    } else {
                        remoteFS.delete(remoteNodeTmpLogFileForApp, false);
                    }
                    return null;
                }
            });
            diagnosticMessage = "Log uploaded successfully for Application: " + appId + " in NodeManager: " + LogAggregationUtils.getNodeString(nodeId) + " at " + Times.format(currentTime) + "\n";
        } catch (Exception e) {
            LOG.error("Failed to move temporary log file to final location: [" + remoteNodeTmpLogFileForApp + "] to [" + renamedPath + "]", e);
            diagnosticMessage = "Log uploaded failed for Application: " + appId + " in NodeManager: " + LogAggregationUtils.getNodeString(nodeId) + " at " + Times.format(currentTime) + "\n";
            renameTemporaryLogFileFailed = true;
            logAggregationSucceedInThisCycle = false;
        }
    } finally {
        LogAggregationStatus logAggregationStatus = logAggregationSucceedInThisCycle ? LogAggregationStatus.RUNNING : LogAggregationStatus.RUNNING_WITH_FAILURE;
        sendLogAggregationReport(logAggregationStatus, diagnosticMessage);
        if (appFinished) {
            // If the app is finished, one extra final report with log aggregation
            // status SUCCEEDED/FAILED will be sent to RM to inform the RM
            // that the log aggregation in this NM is completed.
            LogAggregationStatus finalLogAggregationStatus = renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle ? LogAggregationStatus.FAILED : LogAggregationStatus.SUCCEEDED;
            sendLogAggregationReport(finalLogAggregationStatus, "");
        }
        if (writer != null) {
            writer.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ContainerType(org.apache.hadoop.yarn.server.api.ContainerType) ContainerLogContext(org.apache.hadoop.yarn.server.api.ContainerLogContext) IOException(java.io.IOException) IOException(java.io.IOException) UnsupportedFileSystemException(org.apache.hadoop.fs.UnsupportedFileSystemException) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) LogWriter(org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter) FileSystem(org.apache.hadoop.fs.FileSystem) LogAggregationStatus(org.apache.hadoop.yarn.api.records.LogAggregationStatus) Credentials(org.apache.hadoop.security.Credentials) HashSet(java.util.HashSet)

Aggregations

LogAggregationStatus (org.apache.hadoop.yarn.api.records.LogAggregationStatus)6 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)2 NodeId (org.apache.hadoop.yarn.api.records.NodeId)2 LogAggregationReport (org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 UnsupportedFileSystemException (org.apache.hadoop.fs.UnsupportedFileSystemException)1 Credentials (org.apache.hadoop.security.Credentials)1 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 ApplicationResourceUsageReport (org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport)1 ApplicationTimeout (org.apache.hadoop.yarn.api.records.ApplicationTimeout)1