Search in sources :

Example 6 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project flink by apache.

the class HadoopOutputFormatBase method open.

/**
	 * create the temporary output file for hadoop RecordWriter.
	 * @param taskNumber The number of the parallel instance.
	 * @param numTasks The number of parallel tasks.
	 * @throws java.io.IOException
	 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        this.outputCommitter = this.jobConf.getOutputCommitter();
        JobContext jobContext;
        try {
            jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        this.outputCommitter.setupJob(jobContext);
        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapred.JobContext) HadoopDummyProgressable(org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyProgressable) IOException(java.io.IOException) JobID(org.apache.hadoop.mapred.JobID)

Example 7 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestEventFetcher method getMockedCompletionEventsUpdate.

private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate(int startIdx, int numEvents) {
    ArrayList<TaskCompletionEvent> tceList = new ArrayList<TaskCompletionEvent>(numEvents);
    for (int i = 0; i < numEvents; ++i) {
        int eventIdx = startIdx + i;
        TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx, new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0), eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED, "http://somehost:8888");
        tceList.add(tce);
    }
    TaskCompletionEvent[] events = {};
    return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false);
}
Also used : TaskCompletionEvent(org.apache.hadoop.mapred.TaskCompletionEvent) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) MapTaskCompletionEventsUpdate(org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate) ArrayList(java.util.ArrayList)

Example 8 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestEventFetcher method testConsecutiveFetch.

@Test
public void testConsecutiveFetch() throws IOException, InterruptedException {
    final int MAX_EVENTS_TO_FETCH = 100;
    TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 1);
    TaskUmbilicalProtocol umbilical = mock(TaskUmbilicalProtocol.class);
    when(umbilical.getMapCompletionEvents(any(JobID.class), anyInt(), anyInt(), any(TaskAttemptID.class))).thenReturn(getMockedCompletionEventsUpdate(0, 0));
    when(umbilical.getMapCompletionEvents(any(JobID.class), eq(0), eq(MAX_EVENTS_TO_FETCH), eq(tid))).thenReturn(getMockedCompletionEventsUpdate(0, MAX_EVENTS_TO_FETCH));
    when(umbilical.getMapCompletionEvents(any(JobID.class), eq(MAX_EVENTS_TO_FETCH), eq(MAX_EVENTS_TO_FETCH), eq(tid))).thenReturn(getMockedCompletionEventsUpdate(MAX_EVENTS_TO_FETCH, MAX_EVENTS_TO_FETCH));
    when(umbilical.getMapCompletionEvents(any(JobID.class), eq(MAX_EVENTS_TO_FETCH * 2), eq(MAX_EVENTS_TO_FETCH), eq(tid))).thenReturn(getMockedCompletionEventsUpdate(MAX_EVENTS_TO_FETCH * 2, 3));
    @SuppressWarnings("unchecked") ShuffleScheduler<String, String> scheduler = mock(ShuffleScheduler.class);
    ExceptionReporter reporter = mock(ExceptionReporter.class);
    EventFetcherForTest<String, String> ef = new EventFetcherForTest<String, String>(tid, umbilical, scheduler, reporter, MAX_EVENTS_TO_FETCH);
    ef.getMapCompletionEvents();
    verify(reporter, never()).reportException(any(Throwable.class));
    InOrder inOrder = inOrder(umbilical);
    inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class), eq(0), eq(MAX_EVENTS_TO_FETCH), eq(tid));
    inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class), eq(MAX_EVENTS_TO_FETCH), eq(MAX_EVENTS_TO_FETCH), eq(tid));
    inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class), eq(MAX_EVENTS_TO_FETCH * 2), eq(MAX_EVENTS_TO_FETCH), eq(tid));
    verify(scheduler, times(MAX_EVENTS_TO_FETCH * 2 + 3)).resolve(any(TaskCompletionEvent.class));
}
Also used : InOrder(org.mockito.InOrder) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) Mockito.anyString(org.mockito.Mockito.anyString) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) TaskCompletionEvent(org.apache.hadoop.mapred.TaskCompletionEvent) JobID(org.apache.hadoop.mapred.JobID) Test(org.junit.Test)

Example 9 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestHistoryViewerPrinter method addTaskAttemptInfo.

private static void addTaskAttemptInfo(JobHistoryParser.TaskInfo task, int id) {
    JobHistoryParser.TaskAttemptInfo attempt = new JobHistoryParser.TaskAttemptInfo();
    attempt.attemptId = new TaskAttemptID(TaskID.downgrade(task.getTaskId()), id);
    attempt.startTime = task.getStartTime();
    attempt.finishTime = task.getFinishTime();
    attempt.shuffleFinishTime = task.getFinishTime();
    attempt.sortFinishTime = task.getFinishTime();
    attempt.mapFinishTime = task.getFinishTime();
    attempt.status = task.getTaskStatus();
    attempt.taskType = task.getTaskType();
    attempt.trackerName = "localhost";
    attempt.httpPort = 1234;
    attempt.hostname = "localhost";
    task.attemptsMap.put(attempt.getAttemptId(), attempt);
}
Also used : TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID)

Example 10 with TaskAttemptID

use of org.apache.hadoop.mapred.TaskAttemptID in project hadoop by apache.

the class TestShuffleScheduler method TestAggregatedTransferRate.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(10);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    TaskAttemptID attemptID0 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    //adding the 1st interval, 40MB from 60s to 100s
    long bytes = (long) 40 * 1024 * 1024;
    scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
    Assert.assertEquals(copyMessage(1, 1, 1), progress.toString());
    TaskAttemptID attemptID1 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    //adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
    Assert.assertEquals(copyMessage(2, 1, 1), progress.toString());
    TaskAttemptID attemptID2 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 2), 2);
    //adding the 3rd interval overlapping with the 1st and the 2nd interval
    //110MB from 25s to 80s
    bytes = (long) 110 * 1024 * 1024;
    scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
    Assert.assertEquals(copyMessage(3, 2, 2), progress.toString());
    TaskAttemptID attemptID3 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 3), 3);
    //adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
    bytes = (long) 100 * 1024 * 1024;
    scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
    Assert.assertEquals(copyMessage(4, 0.5, 1), progress.toString());
    TaskAttemptID attemptID4 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 4), 4);
    //adding the 5th interval between after 4th, 50MB from 350s to 400s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
    Assert.assertEquals(copyMessage(5, 1, 1), progress.toString());
    TaskAttemptID attemptID5 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 5), 5);
    //adding the 6th interval between after 5th, 50MB from 450s to 500s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
    Assert.assertEquals(copyMessage(6, 1, 1), progress.toString());
    TaskAttemptID attemptID6 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 6), 6);
    //adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
    bytes = (long) 20 * 1024 * 1024;
    scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
    Assert.assertEquals(copyMessage(7, 1, 1), progress.toString());
    TaskAttemptID attemptID7 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 7), 7);
    //adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
    bytes = (long) 30 * 1024 * 1024;
    scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
    Assert.assertEquals(copyMessage(8, 0.5, 1), progress.toString());
    TaskAttemptID attemptID8 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 8), 8);
    //adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
    bytes = (long) 50 * 1024 * 1024;
    scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
    Assert.assertEquals(copyMessage(9, 1, 1), progress.toString());
    TaskAttemptID attemptID9 = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 9), 9);
    //adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
    bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
    Assert.assertEquals(copyMessage(10, 1, 2), progress.toString());
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)17 File (java.io.File)6 Test (org.junit.Test)6 JobConf (org.apache.hadoop.mapred.JobConf)5 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 Path (org.apache.hadoop.fs.Path)4 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 JobID (org.apache.hadoop.mapred.JobID)3 TaskStatus (org.apache.hadoop.mapred.TaskStatus)3 TaskUmbilicalProtocol (org.apache.hadoop.mapred.TaskUmbilicalProtocol)3 JobID (org.apache.hadoop.mapreduce.JobID)3 Progress (org.apache.hadoop.util.Progress)3 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)2 AuthenticationTestContext (co.cask.cdap.security.auth.context.AuthenticationTestContext)2 NoOpAuthorizer (co.cask.cdap.security.spi.authorization.NoOpAuthorizer)2 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 CarbonStorePath (org.apache.carbondata.core.util.path.CarbonStorePath)2 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)2