use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.
the class HadoopArchiveLogs method filterAppsByAggregatedStatus.
@VisibleForTesting
void filterAppsByAggregatedStatus() throws IOException, YarnException {
YarnClient client = YarnClient.createYarnClient();
try {
client.init(getConf());
client.start();
for (Iterator<AppInfo> it = eligibleApplications.iterator(); it.hasNext(); ) {
AppInfo app = it.next();
try {
ApplicationReport report = client.getApplicationReport(ApplicationId.fromString(app.getAppId()));
LogAggregationStatus aggStatus = report.getLogAggregationStatus();
if (aggStatus.equals(LogAggregationStatus.RUNNING) || aggStatus.equals(LogAggregationStatus.RUNNING_WITH_FAILURE) || aggStatus.equals(LogAggregationStatus.NOT_START) || aggStatus.equals(LogAggregationStatus.DISABLED) || aggStatus.equals(LogAggregationStatus.FAILED)) {
if (verbose) {
LOG.info("Skipping " + app.getAppId() + " due to aggregation status being " + aggStatus);
}
it.remove();
} else {
if (verbose) {
LOG.info(app.getAppId() + " has aggregation status " + aggStatus);
}
app.setFinishTime(report.getFinishTime());
}
} catch (ApplicationNotFoundException e) {
// Assume the aggregation has finished
if (verbose) {
LOG.info(app.getAppId() + " not in the ResourceManager");
}
}
}
} finally {
if (client != null) {
client.stop();
}
}
}
use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.
the class AppBlock method generateOverviewTable.
/**
* Generate overview table for app web page.
* @param app app info.
* @param schedulerPath schedule path.
* @param webUiType web ui type.
* @param appReport app report.
*/
private void generateOverviewTable(AppInfo app, String schedulerPath, String webUiType, ApplicationReport appReport) {
ResponseInfo overviewTable = info("Application Overview")._("User:", schedulerPath, app.getUser())._("Name:", app.getName())._("Application Type:", app.getType())._("Application Tags:", app.getApplicationTags() == null ? "" : app.getApplicationTags())._("Application Priority:", clarifyAppPriority(app.getPriority()))._("YarnApplicationState:", app.getAppState() == null ? UNAVAILABLE : clarifyAppState(app.getAppState()))._("Queue:", schedulerPath, app.getQueue())._("FinalStatus Reported by AM:", clairfyAppFinalStatus(app.getFinalAppStatus()))._("Started:", Times.format(app.getStartedTime()))._("Elapsed:", StringUtils.formatTime(Times.elapsed(app.getStartedTime(), app.getFinishedTime())))._("Tracking URL:", app.getTrackingUrl() == null || app.getTrackingUrl().equals(UNAVAILABLE) ? null : root_url(app.getTrackingUrl()), app.getTrackingUrl() == null || app.getTrackingUrl().equals(UNAVAILABLE) ? "Unassigned" : app.getAppState() == YarnApplicationState.FINISHED || app.getAppState() == YarnApplicationState.FAILED || app.getAppState() == YarnApplicationState.KILLED ? "History" : "ApplicationMaster");
if (webUiType != null && webUiType.equals(YarnWebParams.RM_WEB_UI)) {
LogAggregationStatus status = getLogAggregationStatus();
if (status == null) {
overviewTable._("Log Aggregation Status:", "N/A");
} else if (status == LogAggregationStatus.DISABLED || status == LogAggregationStatus.NOT_START || status == LogAggregationStatus.SUCCEEDED) {
overviewTable._("Log Aggregation Status:", status.name());
} else {
overviewTable._("Log Aggregation Status:", root_url("logaggregationstatus", app.getAppId()), status.name());
}
long timeout = appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME).getRemainingTime();
if (timeout < 0) {
overviewTable._("Application Timeout (Remaining Time):", "Unlimited");
} else {
overviewTable._("Application Timeout (Remaining Time):", String.format("%d seconds", timeout));
}
}
overviewTable._("Diagnostics:", app.getDiagnosticsInfo() == null ? "" : app.getDiagnosticsInfo());
overviewTable._("Unmanaged Application:", app.isUnmanagedApp());
overviewTable._("Application Node Label expression:", app.getAppNodeLabelExpression() == null ? "<Not set>" : app.getAppNodeLabelExpression());
overviewTable._("AM container Node Label expression:", app.getAmNodeLabelExpression() == null ? "<Not set>" : app.getAmNodeLabelExpression());
}
use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.
the class RMAppImpl method createAndGetApplicationReport.
@Override
public ApplicationReport createAndGetApplicationReport(String clientUserName, boolean allowAccess) {
this.readLock.lock();
try {
ApplicationAttemptId currentApplicationAttemptId = null;
org.apache.hadoop.yarn.api.records.Token clientToAMToken = null;
String trackingUrl = UNAVAILABLE;
String host = UNAVAILABLE;
String origTrackingUrl = UNAVAILABLE;
LogAggregationStatus logAggregationStatus = null;
int rpcPort = -1;
ApplicationResourceUsageReport appUsageReport = RMServerUtils.DUMMY_APPLICATION_RESOURCE_USAGE_REPORT;
FinalApplicationStatus finishState = getFinalApplicationStatus();
String diags = UNAVAILABLE;
float progress = 0.0f;
org.apache.hadoop.yarn.api.records.Token amrmToken = null;
if (allowAccess) {
trackingUrl = getDefaultProxyTrackingUrl();
if (this.currentAttempt != null) {
currentApplicationAttemptId = this.currentAttempt.getAppAttemptId();
trackingUrl = this.currentAttempt.getTrackingUrl();
origTrackingUrl = this.currentAttempt.getOriginalTrackingUrl();
if (UserGroupInformation.isSecurityEnabled()) {
// get a token so the client can communicate with the app attempt
// NOTE: token may be unavailable if the attempt is not running
Token<ClientToAMTokenIdentifier> attemptClientToAMToken = this.currentAttempt.createClientToken(clientUserName);
if (attemptClientToAMToken != null) {
clientToAMToken = BuilderUtils.newClientToAMToken(attemptClientToAMToken.getIdentifier(), attemptClientToAMToken.getKind().toString(), attemptClientToAMToken.getPassword(), attemptClientToAMToken.getService().toString());
}
}
host = this.currentAttempt.getHost();
rpcPort = this.currentAttempt.getRpcPort();
appUsageReport = currentAttempt.getApplicationResourceUsageReport();
progress = currentAttempt.getProgress();
logAggregationStatus = this.getLogAggregationStatusForAppReport();
}
//if the diagnostics is not already set get it from attempt
diags = getDiagnostics().toString();
if (currentAttempt != null && currentAttempt.getAppAttemptState() == RMAppAttemptState.LAUNCHED) {
if (getApplicationSubmissionContext().getUnmanagedAM() && clientUserName != null && getUser().equals(clientUserName)) {
Token<AMRMTokenIdentifier> token = currentAttempt.getAMRMToken();
if (token != null) {
amrmToken = BuilderUtils.newAMRMToken(token.getIdentifier(), token.getKind().toString(), token.getPassword(), token.getService().toString());
}
}
}
RMAppMetrics rmAppMetrics = getRMAppMetrics();
appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds());
appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds());
appUsageReport.setPreemptedMemorySeconds(rmAppMetrics.getPreemptedMemorySeconds());
appUsageReport.setPreemptedVcoreSeconds(rmAppMetrics.getPreemptedVcoreSeconds());
}
if (currentApplicationAttemptId == null) {
currentApplicationAttemptId = BuilderUtils.newApplicationAttemptId(this.applicationId, DUMMY_APPLICATION_ATTEMPT_NUMBER);
}
ApplicationReport report = BuilderUtils.newApplicationReport(this.applicationId, currentApplicationAttemptId, this.user, this.queue, this.name, host, rpcPort, clientToAMToken, createApplicationState(), diags, trackingUrl, this.startTime, this.finishTime, finishState, appUsageReport, origTrackingUrl, progress, this.applicationType, amrmToken, applicationTags, this.getApplicationPriority());
report.setLogAggregationStatus(logAggregationStatus);
report.setUnmanagedApp(submissionContext.getUnmanagedAM());
report.setAppNodeLabelExpression(getAppNodeLabelExpression());
report.setAmNodeLabelExpression(getAmNodeLabelExpression());
ApplicationTimeout timeout = ApplicationTimeout.newInstance(ApplicationTimeoutType.LIFETIME, UNLIMITED, UNKNOWN);
// are supported in YARN-5692, the below logic need to be changed.
if (!this.applicationTimeouts.isEmpty()) {
long timeoutInMillis = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME).longValue();
timeout.setExpiryTime(Times.formatISO8601(timeoutInMillis));
if (isAppInCompletedStates()) {
// if application configured with timeout and finished before timeout
// happens then remaining time should not be calculated.
timeout.setRemainingTime(0);
} else {
timeout.setRemainingTime(Math.max((timeoutInMillis - systemClock.getTime()) / 1000, 0));
}
}
report.setApplicationTimeouts(Collections.singletonMap(timeout.getTimeoutType(), timeout));
return report;
} finally {
this.readLock.unlock();
}
}
use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.
the class RMAppLogAggregationStatusBlock method render.
@Override
protected void render(Block html) {
String aid = $(APPLICATION_ID);
if (aid.isEmpty()) {
puts("Bad request: requires Application ID");
return;
}
ApplicationId appId;
try {
appId = Apps.toAppID(aid);
} catch (Exception e) {
puts("Invalid Application ID: " + aid);
return;
}
setTitle(join("Application ", aid));
// Add LogAggregationStatus description table
// to explain the meaning of different LogAggregationStatus
DIV<Hamlet> div_description = html.div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table_description = div_description.table("#LogAggregationStatusDecription");
table_description.tr().th(_TH, "Log Aggregation Status").th(_TH, "Description")._();
table_description.tr().td(LogAggregationStatus.DISABLED.name()).td("Log Aggregation is Disabled.")._();
table_description.tr().td(LogAggregationStatus.NOT_START.name()).td("Log Aggregation does not Start.")._();
table_description.tr().td(LogAggregationStatus.RUNNING.name()).td("Log Aggregation is Running.")._();
table_description.tr().td(LogAggregationStatus.RUNNING_WITH_FAILURE.name()).td("Log Aggregation is Running, but has failures " + "in previous cycles")._();
table_description.tr().td(LogAggregationStatus.SUCCEEDED.name()).td("Log Aggregation is Succeeded. All of the logs have been " + "aggregated successfully.")._();
table_description.tr().td(LogAggregationStatus.FAILED.name()).td("Log Aggregation is Failed. At least one of the logs " + "have not been aggregated.")._();
table_description.tr().td(LogAggregationStatus.TIME_OUT.name()).td("The application is finished, but the log aggregation status is " + "not updated for a long time. Not sure whether the log aggregation " + "is finished or not.")._();
table_description._();
div_description._();
RMApp rmApp = rm.getRMContext().getRMApps().get(appId);
// Application Log aggregation status Table
DIV<Hamlet> div = html.div(_INFO_WRAP);
TABLE<DIV<Hamlet>> table = div.h3("Log Aggregation: " + (rmApp == null ? "N/A" : rmApp.getLogAggregationStatusForAppReport() == null ? "N/A" : rmApp.getLogAggregationStatusForAppReport().name())).table("#LogAggregationStatus");
int maxLogAggregationDiagnosticsInMemory = conf.getInt(YarnConfiguration.RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY, YarnConfiguration.DEFAULT_RM_MAX_LOG_AGGREGATION_DIAGNOSTICS_IN_MEMORY);
table.tr().th(_TH, "NodeId").th(_TH, "Log Aggregation Status").th(_TH, "Last " + maxLogAggregationDiagnosticsInMemory + " Diagnostic Messages").th(_TH, "Last " + maxLogAggregationDiagnosticsInMemory + " Failure Messages")._();
if (rmApp != null) {
Map<NodeId, LogAggregationReport> logAggregationReports = rmApp.getLogAggregationReportsForApp();
if (logAggregationReports != null && !logAggregationReports.isEmpty()) {
for (Entry<NodeId, LogAggregationReport> report : logAggregationReports.entrySet()) {
LogAggregationStatus status = report.getValue() == null ? null : report.getValue().getLogAggregationStatus();
String message = report.getValue() == null ? null : report.getValue().getDiagnosticMessage();
String failureMessage = report.getValue() == null ? null : ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(report.getKey());
table.tr().td(report.getKey().toString()).td(status == null ? "N/A" : status.toString()).td(message == null ? "N/A" : message).td(failureMessage == null ? "N/A" : failureMessage)._();
}
}
}
table._();
div._();
}
use of org.apache.hadoop.yarn.api.records.LogAggregationStatus in project hadoop by apache.
the class AppLogAggregatorImpl method uploadLogsForContainers.
private void uploadLogsForContainers(boolean appFinished) {
if (this.logAggregationDisabled) {
return;
}
if (UserGroupInformation.isSecurityEnabled()) {
Credentials systemCredentials = context.getSystemCredentialsForApps().get(appId);
if (systemCredentials != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding new framework-token for " + appId + " for log-aggregation: " + systemCredentials.getAllTokens() + "; userUgi=" + userUgi);
}
// this will replace old token
userUgi.addCredentials(systemCredentials);
}
}
// Create a set of Containers whose logs will be uploaded in this cycle.
// It includes:
// a) all containers in pendingContainers: those containers are finished
// and satisfy the ContainerLogAggregationPolicy.
// b) some set of running containers: For all the Running containers,
// we use exitCode of 0 to find those which satisfy the
// ContainerLogAggregationPolicy.
Set<ContainerId> pendingContainerInThisCycle = new HashSet<ContainerId>();
this.pendingContainers.drainTo(pendingContainerInThisCycle);
Set<ContainerId> finishedContainers = new HashSet<ContainerId>(pendingContainerInThisCycle);
if (this.context.getApplications().get(this.appId) != null) {
for (Container container : this.context.getApplications().get(this.appId).getContainers().values()) {
ContainerType containerType = container.getContainerTokenIdentifier().getContainerType();
if (shouldUploadLogs(new ContainerLogContext(container.getContainerId(), containerType, 0))) {
pendingContainerInThisCycle.add(container.getContainerId());
}
}
}
LogWriter writer = null;
String diagnosticMessage = "";
boolean logAggregationSucceedInThisCycle = true;
try {
if (pendingContainerInThisCycle.isEmpty()) {
return;
}
logAggregationTimes++;
try {
writer = createLogWriter();
// Write ACLs once when the writer is created.
writer.writeApplicationACLs(appAcls);
writer.writeApplicationOwner(this.userUgi.getShortUserName());
} catch (IOException e1) {
logAggregationSucceedInThisCycle = false;
LOG.error("Cannot create writer for app " + this.applicationId + ". Skip log upload this time. ", e1);
return;
}
boolean uploadedLogsInThisCycle = false;
for (ContainerId container : pendingContainerInThisCycle) {
ContainerLogAggregator aggregator = null;
if (containerLogAggregators.containsKey(container)) {
aggregator = containerLogAggregators.get(container);
} else {
aggregator = new ContainerLogAggregator(container);
containerLogAggregators.put(container, aggregator);
}
Set<Path> uploadedFilePathsInThisCycle = aggregator.doContainerLogAggregation(writer, appFinished, finishedContainers.contains(container));
if (uploadedFilePathsInThisCycle.size() > 0) {
uploadedLogsInThisCycle = true;
this.delService.delete(this.userUgi.getShortUserName(), null, uploadedFilePathsInThisCycle.toArray(new Path[uploadedFilePathsInThisCycle.size()]));
}
// remove it from containerLogAggregators.
if (finishedContainers.contains(container)) {
containerLogAggregators.remove(container);
}
}
// is smaller than the configured NM log aggregation retention size.
if (uploadedLogsInThisCycle && logAggregationInRolling) {
cleanOldLogs();
cleanupOldLogTimes++;
}
if (writer != null) {
writer.close();
writer = null;
}
long currentTime = System.currentTimeMillis();
final Path renamedPath = this.rollingMonitorInterval <= 0 ? remoteNodeLogFileForApp : new Path(remoteNodeLogFileForApp.getParent(), remoteNodeLogFileForApp.getName() + "_" + currentTime);
final boolean rename = uploadedLogsInThisCycle;
try {
userUgi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
FileSystem remoteFS = remoteNodeLogFileForApp.getFileSystem(conf);
if (rename) {
remoteFS.rename(remoteNodeTmpLogFileForApp, renamedPath);
} else {
remoteFS.delete(remoteNodeTmpLogFileForApp, false);
}
return null;
}
});
diagnosticMessage = "Log uploaded successfully for Application: " + appId + " in NodeManager: " + LogAggregationUtils.getNodeString(nodeId) + " at " + Times.format(currentTime) + "\n";
} catch (Exception e) {
LOG.error("Failed to move temporary log file to final location: [" + remoteNodeTmpLogFileForApp + "] to [" + renamedPath + "]", e);
diagnosticMessage = "Log uploaded failed for Application: " + appId + " in NodeManager: " + LogAggregationUtils.getNodeString(nodeId) + " at " + Times.format(currentTime) + "\n";
renameTemporaryLogFileFailed = true;
logAggregationSucceedInThisCycle = false;
}
} finally {
LogAggregationStatus logAggregationStatus = logAggregationSucceedInThisCycle ? LogAggregationStatus.RUNNING : LogAggregationStatus.RUNNING_WITH_FAILURE;
sendLogAggregationReport(logAggregationStatus, diagnosticMessage);
if (appFinished) {
// If the app is finished, one extra final report with log aggregation
// status SUCCEEDED/FAILED will be sent to RM to inform the RM
// that the log aggregation in this NM is completed.
LogAggregationStatus finalLogAggregationStatus = renameTemporaryLogFileFailed || !logAggregationSucceedInThisCycle ? LogAggregationStatus.FAILED : LogAggregationStatus.SUCCEEDED;
sendLogAggregationReport(finalLogAggregationStatus, "");
}
if (writer != null) {
writer.close();
}
}
}
Aggregations