use of org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer in project hadoop by apache.
the class JobHistoryEventHandler method serviceInit.
/* (non-Javadoc)
* @see org.apache.hadoop.yarn.service.AbstractService#init(org.
* apache.hadoop.conf.Configuration)
* Initializes the FileSystem and Path objects for the log and done directories.
* Creates these directories if they do not already exist.
*/
@Override
protected void serviceInit(Configuration conf) throws Exception {
String jobId = TypeConverter.fromYarn(context.getApplicationID()).toString();
String stagingDirStr = null;
String doneDirStr = null;
String userDoneDirStr = null;
try {
stagingDirStr = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId);
doneDirStr = JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf);
userDoneDirStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);
} catch (IOException e) {
LOG.error("Failed while getting the configured log directories", e);
throw new YarnRuntimeException(e);
}
//Check for the existence of the history staging dir. Maybe create it.
try {
stagingDirPath = FileContext.getFileContext(conf).makeQualified(new Path(stagingDirStr));
stagingDirFS = FileSystem.get(stagingDirPath.toUri(), conf);
mkdir(stagingDirFS, stagingDirPath, new FsPermission(JobHistoryUtils.HISTORY_STAGING_DIR_PERMISSIONS));
} catch (IOException e) {
LOG.error("Failed while checking for/creating history staging path: [" + stagingDirPath + "]", e);
throw new YarnRuntimeException(e);
}
//Check for the existence of intermediate done dir.
Path doneDirPath = null;
try {
doneDirPath = FileContext.getFileContext(conf).makeQualified(new Path(doneDirStr));
doneDirFS = FileSystem.get(doneDirPath.toUri(), conf);
// created by the JobHistoryServer or as part of deployment.
if (!doneDirFS.exists(doneDirPath)) {
if (JobHistoryUtils.shouldCreateNonUserDirectory(conf)) {
LOG.info("Creating intermediate history logDir: [" + doneDirPath + "] + based on conf. Should ideally be created by the JobHistoryServer: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR);
mkdir(doneDirFS, doneDirPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort()));
// TODO Temporary toShort till new FsPermission(FsPermissions)
// respects
// sticky
} else {
String message = "Not creating intermediate history logDir: [" + doneDirPath + "] based on conf: " + MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR + ". Either set to true or pre-create this directory with" + " appropriate permissions";
LOG.error(message);
throw new YarnRuntimeException(message);
}
}
} catch (IOException e) {
LOG.error("Failed checking for the existance of history intermediate " + "done directory: [" + doneDirPath + "]");
throw new YarnRuntimeException(e);
}
//Check/create user directory under intermediate done dir.
try {
doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(new Path(userDoneDirStr));
mkdir(doneDirFS, doneDirPrefixPath, new FsPermission(JobHistoryUtils.HISTORY_INTERMEDIATE_USER_DIR_PERMISSIONS));
} catch (IOException e) {
LOG.error("Error creating user intermediate history done directory: [ " + doneDirPrefixPath + "]", e);
throw new YarnRuntimeException(e);
}
// Maximum number of unflushed completion-events that can stay in the queue
// before flush kicks in.
maxUnflushedCompletionEvents = conf.getInt(MRJobConfig.MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS, MRJobConfig.DEFAULT_MR_AM_HISTORY_MAX_UNFLUSHED_COMPLETE_EVENTS);
// We want to cut down flushes after job completes so as to write quicker,
// so we increase maxUnflushedEvents post Job completion by using the
// following multiplier.
postJobCompletionMultiplier = conf.getInt(MRJobConfig.MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER, MRJobConfig.DEFAULT_MR_AM_HISTORY_JOB_COMPLETE_UNFLUSHED_MULTIPLIER);
// Max time until which flush doesn't take place.
flushTimeout = conf.getLong(MRJobConfig.MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS, MRJobConfig.DEFAULT_MR_AM_HISTORY_COMPLETE_EVENT_FLUSH_TIMEOUT_MS);
minQueueSizeForBatchingFlushes = conf.getInt(MRJobConfig.MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD, MRJobConfig.DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD);
// configuration status: off, on_with_v1 or on_with_v2.
if (conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_EMIT_TIMELINE_DATA, MRJobConfig.DEFAULT_MAPREDUCE_JOB_EMIT_TIMELINE_DATA)) {
LOG.info("Emitting job history data to the timeline service is enabled");
if (YarnConfiguration.timelineServiceEnabled(conf)) {
boolean timelineServiceV2Enabled = ((int) YarnConfiguration.getTimelineServiceVersion(conf) == 2);
if (timelineServiceV2Enabled) {
timelineV2Client = ((MRAppMaster.RunningAppContext) context).getTimelineV2Client();
timelineV2Client.init(conf);
} else {
timelineClient = ((MRAppMaster.RunningAppContext) context).getTimelineClient();
timelineClient.init(conf);
}
LOG.info("Timeline service is enabled; version: " + YarnConfiguration.getTimelineServiceVersion(conf));
} else {
LOG.info("Timeline service is not enabled");
}
} else {
LOG.info("Emitting job history data to the timeline server is not " + "enabled");
}
// Flag for setting
String jhistFormat = conf.get(JHAdminConfig.MR_HS_JHIST_FORMAT, JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT);
if (jhistFormat.equals("json")) {
jhistMode = EventWriter.WriteMode.JSON;
} else if (jhistFormat.equals("binary")) {
jhistMode = EventWriter.WriteMode.BINARY;
} else {
LOG.warn("Unrecognized value '" + jhistFormat + "' for property " + JHAdminConfig.MR_HS_JHIST_FORMAT + ". Valid values are " + "'json' or 'binary'. Falling back to default value '" + JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT + "'.");
}
super.serviceInit(conf);
}
use of org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer in project hadoop by apache.
the class JHEventHandlerForSigtermTest method testSigTermedFunctionality.
@Test
public /**
* Tests that in case of SIGTERM, the JHEH stops without processing its event
* queue (because we must stop quickly lest we get SIGKILLed) and processes
* a JobUnsuccessfulEvent for jobs which were still running (so that they may
* show up in the JobHistoryServer)
*/
void testSigTermedFunctionality() throws IOException {
AppContext mockedContext = Mockito.mock(AppContext.class);
JHEventHandlerForSigtermTest jheh = new JHEventHandlerForSigtermTest(mockedContext, 0);
JobId jobId = Mockito.mock(JobId.class);
jheh.addToFileMap(jobId);
//Submit 4 events and check that they're handled in the absence of a signal
final int numEvents = 4;
JobHistoryEvent[] events = new JobHistoryEvent[numEvents];
for (int i = 0; i < numEvents; ++i) {
events[i] = getEventToEnqueue(jobId);
jheh.handle(events[i]);
}
jheh.stop();
//Make sure events were handled
assertTrue("handleEvent should've been called only 4 times but was " + jheh.eventsHandled, jheh.eventsHandled == 4);
//Create a new jheh because the last stop closed the eventWriter etc.
jheh = new JHEventHandlerForSigtermTest(mockedContext, 0);
// Make constructor of JobUnsuccessfulCompletionEvent pass
Job job = Mockito.mock(Job.class);
Mockito.when(mockedContext.getJob(jobId)).thenReturn(job);
// Make TypeConverter(JobID) pass
ApplicationId mockAppId = Mockito.mock(ApplicationId.class);
Mockito.when(mockAppId.getClusterTimestamp()).thenReturn(1000l);
Mockito.when(jobId.getAppId()).thenReturn(mockAppId);
jheh.addToFileMap(jobId);
jheh.setForcejobCompletion(true);
for (int i = 0; i < numEvents; ++i) {
events[i] = getEventToEnqueue(jobId);
jheh.handle(events[i]);
}
jheh.stop();
//Make sure events were handled, 4 + 1 finish event
assertTrue("handleEvent should've been called only 5 times but was " + jheh.eventsHandled, jheh.eventsHandled == 5);
assertTrue("Last event handled wasn't JobUnsuccessfulCompletionEvent", jheh.lastEventHandled.getHistoryEvent() instanceof JobUnsuccessfulCompletionEvent);
}
use of org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer in project hadoop by apache.
the class JobHistoryServer method serviceInit.
@Override
protected void serviceInit(Configuration conf) throws Exception {
Configuration config = new YarnConfiguration(conf);
config.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
// This is required for WebApps to use https if enabled.
MRWebAppUtil.initialize(getConfig());
try {
doSecureLogin(conf);
} catch (IOException ie) {
throw new YarnRuntimeException("History Server Failed to login", ie);
}
jobHistoryService = new JobHistory();
historyContext = (HistoryContext) jobHistoryService;
stateStore = createStateStore(conf);
this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
clientService = createHistoryClientService();
aggLogDelService = new AggregatedLogDeletionService();
hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
addService(stateStore);
addService(new HistoryServerSecretManagerService());
addService(jobHistoryService);
addService(clientService);
addService(aggLogDelService);
addService(hsAdminServer);
DefaultMetricsSystem.initialize("JobHistoryServer");
JvmMetrics jm = JvmMetrics.initSingleton("JobHistoryServer", null);
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
super.serviceInit(config);
}
use of org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer in project hadoop by apache.
the class TestJobHistoryServer method testReports.
//Test reports of JobHistoryServer. History server should get log files from MRApp and read them
@Test(timeout = 50000)
public void testReports() throws Exception {
Configuration config = new Configuration();
config.setClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class);
RackResolver.init(config);
MRApp app = new MRAppWithHistory(1, 1, true, this.getClass().getName(), true);
app.submit(config);
Job job = app.getContext().getAllJobs().values().iterator().next();
app.waitForState(job, JobState.SUCCEEDED);
historyServer = new JobHistoryServer();
historyServer.init(config);
historyServer.start();
// search JobHistory service
JobHistory jobHistory = null;
for (Service service : historyServer.getServices()) {
if (service instanceof JobHistory) {
jobHistory = (JobHistory) service;
}
}
;
Map<JobId, Job> jobs = jobHistory.getAllJobs();
assertEquals(1, jobs.size());
assertEquals("job_0_0000", jobs.keySet().iterator().next().toString());
Task task = job.getTasks().values().iterator().next();
TaskAttempt attempt = task.getAttempts().values().iterator().next();
HistoryClientService historyService = historyServer.getClientService();
MRClientProtocol protocol = historyService.getClientHandler();
GetTaskAttemptReportRequest gtarRequest = recordFactory.newRecordInstance(GetTaskAttemptReportRequest.class);
// test getTaskAttemptReport
TaskAttemptId taId = attempt.getID();
taId.setTaskId(task.getID());
taId.getTaskId().setJobId(job.getID());
gtarRequest.setTaskAttemptId(taId);
GetTaskAttemptReportResponse response = protocol.getTaskAttemptReport(gtarRequest);
assertEquals("container_0_0000_01_000000", response.getTaskAttemptReport().getContainerId().toString());
assertTrue(response.getTaskAttemptReport().getDiagnosticInfo().isEmpty());
// counters
assertNotNull(response.getTaskAttemptReport().getCounters().getCounter(TaskCounter.PHYSICAL_MEMORY_BYTES));
assertEquals(taId.toString(), response.getTaskAttemptReport().getTaskAttemptId().toString());
// test getTaskReport
GetTaskReportRequest request = recordFactory.newRecordInstance(GetTaskReportRequest.class);
TaskId taskId = task.getID();
taskId.setJobId(job.getID());
request.setTaskId(taskId);
GetTaskReportResponse reportResponse = protocol.getTaskReport(request);
assertEquals("", reportResponse.getTaskReport().getDiagnosticsList().iterator().next());
// progress
assertEquals(1.0f, reportResponse.getTaskReport().getProgress(), 0.01);
// report has corrected taskId
assertEquals(taskId.toString(), reportResponse.getTaskReport().getTaskId().toString());
// Task state should be SUCCEEDED
assertEquals(TaskState.SUCCEEDED, reportResponse.getTaskReport().getTaskState());
// For invalid jobid, throw IOException
GetTaskReportsRequest gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
gtreportsRequest.setJobId(TypeConverter.toYarn(JobID.forName("job_1415730144495_0001")));
gtreportsRequest.setTaskType(TaskType.REDUCE);
try {
protocol.getTaskReports(gtreportsRequest);
fail("IOException not thrown for invalid job id");
} catch (IOException e) {
// Expected
}
// test getTaskAttemptCompletionEvents
GetTaskAttemptCompletionEventsRequest taskAttemptRequest = recordFactory.newRecordInstance(GetTaskAttemptCompletionEventsRequest.class);
taskAttemptRequest.setJobId(job.getID());
GetTaskAttemptCompletionEventsResponse taskAttemptCompletionEventsResponse = protocol.getTaskAttemptCompletionEvents(taskAttemptRequest);
assertEquals(0, taskAttemptCompletionEventsResponse.getCompletionEventCount());
// test getDiagnostics
GetDiagnosticsRequest diagnosticRequest = recordFactory.newRecordInstance(GetDiagnosticsRequest.class);
diagnosticRequest.setTaskAttemptId(taId);
GetDiagnosticsResponse diagnosticResponse = protocol.getDiagnostics(diagnosticRequest);
// it is strange : why one empty string ?
assertEquals(1, diagnosticResponse.getDiagnosticsCount());
assertEquals("", diagnosticResponse.getDiagnostics(0));
}
use of org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer in project hadoop by apache.
the class TestJHSSecurity method testDelegationToken.
@Test
public void testDelegationToken() throws IOException, InterruptedException {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
final YarnConfiguration conf = new YarnConfiguration(new JobConf());
// Just a random principle
conf.set(JHAdminConfig.MR_HISTORY_PRINCIPAL, "RandomOrc/localhost@apache.org");
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
final long initialInterval = 10000l;
final long maxLifetime = 20000l;
final long renewInterval = 10000l;
JobHistoryServer jobHistoryServer = null;
MRClientProtocol clientUsingDT = null;
long tokenFetchTime;
try {
jobHistoryServer = new JobHistoryServer() {
protected void doSecureLogin(Configuration conf) throws IOException {
// no keytab based login
}
;
@Override
protected JHSDelegationTokenSecretManager createJHSSecretManager(Configuration conf, HistoryServerStateStoreService store) {
return new JHSDelegationTokenSecretManager(initialInterval, maxLifetime, renewInterval, 3600000, store);
}
@Override
protected HistoryClientService createHistoryClientService() {
return new HistoryClientService(historyContext, this.jhsDTSecretManager) {
@Override
protected void initializeWebApp(Configuration conf) {
// Don't need it, skip.;
}
};
}
};
// final JobHistoryServer jobHistoryServer = jhServer;
jobHistoryServer.init(conf);
jobHistoryServer.start();
final MRClientProtocol hsService = jobHistoryServer.getClientService().getClientHandler();
// Fake the authentication-method
UserGroupInformation loggedInUser = UserGroupInformation.createRemoteUser("testrenewer@APACHE.ORG");
Assert.assertEquals("testrenewer", loggedInUser.getShortUserName());
// Default realm is APACHE.ORG
loggedInUser.setAuthenticationMethod(AuthenticationMethod.KERBEROS);
Token token = getDelegationToken(loggedInUser, hsService, loggedInUser.getShortUserName());
tokenFetchTime = System.currentTimeMillis();
LOG.info("Got delegation token at: " + tokenFetchTime);
// Now try talking to JHS using the delegation token
clientUsingDT = getMRClientProtocol(token, jobHistoryServer.getClientService().getBindAddress(), "TheDarkLord", conf);
GetJobReportRequest jobReportRequest = Records.newRecord(GetJobReportRequest.class);
jobReportRequest.setJobId(MRBuilderUtils.newJobId(123456, 1, 1));
try {
clientUsingDT.getJobReport(jobReportRequest);
} catch (IOException e) {
Assert.assertEquals("Unknown job job_123456_0001", e.getMessage());
}
// Renew after 50% of token age.
while (System.currentTimeMillis() < tokenFetchTime + initialInterval / 2) {
Thread.sleep(500l);
}
long nextExpTime = renewDelegationToken(loggedInUser, hsService, token);
long renewalTime = System.currentTimeMillis();
LOG.info("Renewed token at: " + renewalTime + ", NextExpiryTime: " + nextExpTime);
// Wait for first expiry, but before renewed expiry.
while (System.currentTimeMillis() > tokenFetchTime + initialInterval && System.currentTimeMillis() < nextExpTime) {
Thread.sleep(500l);
}
Thread.sleep(50l);
// Valid token because of renewal.
try {
clientUsingDT.getJobReport(jobReportRequest);
} catch (IOException e) {
Assert.assertEquals("Unknown job job_123456_0001", e.getMessage());
}
// Wait for expiry.
while (System.currentTimeMillis() < renewalTime + renewInterval) {
Thread.sleep(500l);
}
Thread.sleep(50l);
LOG.info("At time: " + System.currentTimeMillis() + ", token should be invalid");
// Token should have expired.
try {
clientUsingDT.getJobReport(jobReportRequest);
fail("Should not have succeeded with an expired token");
} catch (IOException e) {
assertTrue(e.getCause().getMessage().contains("is expired"));
}
// Stop the existing proxy, start another.
if (clientUsingDT != null) {
// RPC.stopProxy(clientUsingDT);
clientUsingDT = null;
}
token = getDelegationToken(loggedInUser, hsService, loggedInUser.getShortUserName());
tokenFetchTime = System.currentTimeMillis();
LOG.info("Got delegation token at: " + tokenFetchTime);
// Now try talking to HSService using the delegation token
clientUsingDT = getMRClientProtocol(token, jobHistoryServer.getClientService().getBindAddress(), "loginuser2", conf);
try {
clientUsingDT.getJobReport(jobReportRequest);
} catch (IOException e) {
fail("Unexpected exception" + e);
}
cancelDelegationToken(loggedInUser, hsService, token);
// Testing the token with different renewer to cancel the token
Token tokenWithDifferentRenewer = getDelegationToken(loggedInUser, hsService, "yarn");
cancelDelegationToken(loggedInUser, hsService, tokenWithDifferentRenewer);
if (clientUsingDT != null) {
// RPC.stopProxy(clientUsingDT);
clientUsingDT = null;
}
// Creating a new connection.
clientUsingDT = getMRClientProtocol(token, jobHistoryServer.getClientService().getBindAddress(), "loginuser2", conf);
LOG.info("Cancelled delegation token at: " + System.currentTimeMillis());
// Verify cancellation worked.
try {
clientUsingDT.getJobReport(jobReportRequest);
fail("Should not have succeeded with a cancelled delegation token");
} catch (IOException e) {
}
} finally {
jobHistoryServer.stop();
}
}
Aggregations