use of org.apache.hadoop.mapreduce.TaskAttemptID in project druid by druid-io.
the class HadoopDruidIndexerConfigTest method shouldMakeHDFSCompliantSegmentOutputPath.
@Test
public void shouldMakeHDFSCompliantSegmentOutputPath() {
HadoopIngestionSpec schema;
try {
schema = jsonReadWriteRead("{\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"source\",\n" + " \"metricsSpec\": [],\n" + " \"granularitySpec\": {\n" + " \"type\": \"uniform\",\n" + " \"segmentGranularity\": \"hour\",\n" + " \"intervals\": [\"2012-07-10/P1D\"]\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + " }\n" + "}", HadoopIngestionSpec.class);
} catch (Exception e) {
throw Throwables.propagate(e);
}
HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30), 4712);
Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.INDEX_ZIP);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip", path.toString());
path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), JobHelper.DESCRIPTOR_JSON);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_descriptor.json", path.toString());
path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, -1), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0));
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip.0", path.toString());
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class MRAppMaster method parsePreviousJobHistory.
private void parsePreviousJobHistory() throws IOException {
FSDataInputStream in = getPreviousJobHistoryStream(getConfig(), appAttemptID);
JobHistoryParser parser = new JobHistoryParser(in);
JobInfo jobInfo = parser.parse();
Exception parseException = parser.getParseException();
if (parseException != null) {
LOG.info("Got an error parsing job-history file" + ", ignoring incomplete events.", parseException);
}
Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo.getAllTasks();
for (TaskInfo taskInfo : taskInfos.values()) {
if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
Iterator<Entry<TaskAttemptID, TaskAttemptInfo>> taskAttemptIterator = taskInfo.getAllTaskAttempts().entrySet().iterator();
while (taskAttemptIterator.hasNext()) {
Map.Entry<TaskAttemptID, TaskAttemptInfo> currentEntry = taskAttemptIterator.next();
if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) {
taskAttemptIterator.remove();
}
}
completedTasksFromPreviousRun.put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo);
LOG.info("Read from history task " + TypeConverter.toYarn(taskInfo.getTaskId()));
}
}
LOG.info("Read completed tasks from history " + completedTasksFromPreviousRun.size());
recoveredJobStartTime = jobInfo.getLaunchTime();
// recover AMInfos
List<JobHistoryParser.AMInfo> jhAmInfoList = jobInfo.getAMInfos();
if (jhAmInfoList != null) {
for (JobHistoryParser.AMInfo jhAmInfo : jhAmInfoList) {
AMInfo amInfo = MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(), jhAmInfo.getStartTime(), jhAmInfo.getContainerId(), jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(), jhAmInfo.getNodeManagerHttpPort());
amInfos.add(amInfo);
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestEvents method testTaskAttemptFinishedEvent.
/**
* test a getters of TaskAttemptFinishedEvent and TaskAttemptFinished
*
* @throws Exception
*/
@Test(timeout = 10000)
public void testTaskAttemptFinishedEvent() throws Exception {
JobID jid = new JobID("001", 1);
TaskID tid = new TaskID(jid, TaskType.REDUCE, 2);
TaskAttemptID taskAttemptId = new TaskAttemptID(tid, 3);
Counters counters = new Counters();
TaskAttemptFinishedEvent test = new TaskAttemptFinishedEvent(taskAttemptId, TaskType.REDUCE, "TEST", 123L, "RAKNAME", "HOSTNAME", "STATUS", counters);
assertEquals(test.getAttemptId().toString(), taskAttemptId.toString());
assertEquals(test.getCounters(), counters);
assertEquals(test.getFinishTime(), 123L);
assertEquals(test.getHostname(), "HOSTNAME");
assertEquals(test.getRackName(), "RAKNAME");
assertEquals(test.getState(), "STATUS");
assertEquals(test.getTaskId(), tid);
assertEquals(test.getTaskStatus(), "TEST");
assertEquals(test.getTaskType(), TaskType.REDUCE);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestRecovery method testRecoveryTaskSuccessAllAttemptsSucceed.
@Test
public void testRecoveryTaskSuccessAllAttemptsSucceed() {
LOG.info("--- START: testRecoveryTaskSuccessAllAttemptsFail ---");
long clusterTimestamp = System.currentTimeMillis();
EventHandler mockEventHandler = mock(EventHandler.class);
MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, mockEventHandler);
TaskId taskId = recoverMapTask.getID();
JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
TaskID taskID = new TaskID(jobID, org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
//Mock up the TaskAttempts
Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts = new HashMap<TaskAttemptID, TaskAttemptInfo>();
TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, TaskAttemptState.SUCCEEDED);
mockTaskAttempts.put(taId1, mockTAinfo1);
TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, TaskAttemptState.SUCCEEDED);
mockTaskAttempts.put(taId2, mockTAinfo2);
OutputCommitter mockCommitter = mock(OutputCommitter.class);
TaskInfo mockTaskInfo = mock(TaskInfo.class);
when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED");
when(mockTaskInfo.getTaskId()).thenReturn(taskID);
when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
recoverMapTask.handle(new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
verify(mockEventHandler, atLeast(1)).handle((org.apache.hadoop.yarn.event.Event) arg.capture());
Map<TaskAttemptID, TaskAttemptState> finalAttemptStates = new HashMap<TaskAttemptID, TaskAttemptState>();
finalAttemptStates.put(taId1, TaskAttemptState.SUCCEEDED);
finalAttemptStates.put(taId2, TaskAttemptState.SUCCEEDED);
List<EventType> jobHistoryEvents = new ArrayList<EventType>();
jobHistoryEvents.add(EventType.TASK_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED);
jobHistoryEvents.add(EventType.TASK_FINISHED);
recoveryChecker(recoverMapTask, TaskState.SUCCEEDED, finalAttemptStates, arg, jobHistoryEvents, 2L, 0L);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestRecovery method testRecoveryAllFailAttempts.
@Test
public void testRecoveryAllFailAttempts() {
LOG.info("--- START: testRecoveryAllFailAttempts ---");
long clusterTimestamp = System.currentTimeMillis();
EventHandler mockEventHandler = mock(EventHandler.class);
MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, mockEventHandler);
TaskId taskId = recoverMapTask.getID();
JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
TaskID taskID = new TaskID(jobID, org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
//Mock up the TaskAttempts
Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts = new HashMap<TaskAttemptID, TaskAttemptInfo>();
TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, TaskAttemptState.FAILED);
mockTaskAttempts.put(taId1, mockTAinfo1);
TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, TaskAttemptState.FAILED);
mockTaskAttempts.put(taId2, mockTAinfo2);
OutputCommitter mockCommitter = mock(OutputCommitter.class);
TaskInfo mockTaskInfo = mock(TaskInfo.class);
when(mockTaskInfo.getTaskStatus()).thenReturn("FAILED");
when(mockTaskInfo.getTaskId()).thenReturn(taskID);
when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
recoverMapTask.handle(new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
verify(mockEventHandler, atLeast(1)).handle((org.apache.hadoop.yarn.event.Event) arg.capture());
Map<TaskAttemptID, TaskAttemptState> finalAttemptStates = new HashMap<TaskAttemptID, TaskAttemptState>();
finalAttemptStates.put(taId1, TaskAttemptState.FAILED);
finalAttemptStates.put(taId2, TaskAttemptState.FAILED);
List<EventType> jobHistoryEvents = new ArrayList<EventType>();
jobHistoryEvents.add(EventType.TASK_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
jobHistoryEvents.add(EventType.TASK_FAILED);
recoveryChecker(recoverMapTask, TaskState.FAILED, finalAttemptStates, arg, jobHistoryEvents, 2L, 2L);
}
Aggregations