Search in sources :

Example 56 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class ShuffleSchedulerImpl method getMapsForHost.

public synchronized List<TaskAttemptID> getMapsForHost(MapHost host) {
    List<TaskAttemptID> list = host.getAndClearKnownMaps();
    Iterator<TaskAttemptID> itr = list.iterator();
    List<TaskAttemptID> result = new ArrayList<TaskAttemptID>();
    int includedMaps = 0;
    int totalSize = list.size();
    // find the maps that we still need, up to the limit
    while (itr.hasNext()) {
        TaskAttemptID id = itr.next();
        if (!obsoleteMaps.contains(id) && !finishedMaps[id.getTaskID().getId()]) {
            result.add(id);
            if (++includedMaps >= MAX_MAPS_AT_ONCE) {
                break;
            }
        }
    }
    // put back the maps left after the limit
    while (itr.hasNext()) {
        TaskAttemptID id = itr.next();
        if (!obsoleteMaps.contains(id) && !finishedMaps[id.getTaskID().getId()]) {
            host.addKnownMap(id);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("assigned " + includedMaps + " of " + totalSize + " to " + host + " to " + Thread.currentThread().getName());
    }
    return result;
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList)

Example 57 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestLineRecordReader method testStripBOM.

@Test
public void testStripBOM() throws IOException {
    // the test data contains a BOM at the start of the file
    // confirm the BOM is skipped by LineRecordReader
    String UTF8_BOM = "";
    URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt");
    assertNotNull("Cannot find testBOM.txt", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);
    int numRecords = 0;
    boolean firstLine = true;
    boolean skipBOM = true;
    while (reader.nextKeyValue()) {
        if (firstLine) {
            firstLine = false;
            if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) {
                skipBOM = false;
            }
        }
        ++numRecords;
    }
    reader.close();
    assertTrue("BOM is not skipped", skipBOM);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) URL(java.net.URL) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) Test(org.junit.Test)

Example 58 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestLineRecordReader method testUncompressedInputCustomDelimiterPosValue.

@Test
public void testUncompressedInputCustomDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    String inputData = "abcdefghij++kl++mno";
    Path inputFile = createInputFile(conf, inputData);
    String delimiter = "++";
    byte[] recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    int splitLength = 15;
    FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // Get first record: "abcdefghij"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    LongWritable key = reader.getCurrentKey();
    Text value = reader.getCurrentValue();
    assertEquals("Wrong length for record value", 10, value.getLength());
    assertEquals("Wrong position after record read", 0, key.get());
    // Get second record: "kl"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 2, value.getLength());
    // Key should be 12 right after "abcdefghij++"
    assertEquals("Wrong position after record read", 12, key.get());
    // Get third record: "mno"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Key should be 16 right after "abcdefghij++kl++"
    assertEquals("Wrong position after record read", 16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "abcdefghij++kl++mno"
    assertEquals("Wrong position after record read", 19, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // No record is in the second split because the second split dropped
    // the first record, which was already reported by the first split.
    assertFalse("Unexpected record returned", reader.nextKeyValue());
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    // multi char delimiter with starting part of the delimiter in the data
    inputData = "abcd+efgh++ijk++mno";
    inputFile = createInputFile(conf, inputData);
    splitLength = 5;
    split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // Get first record: "abcd+efgh"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    assertEquals("Wrong position after record read", 0, key.get());
    assertEquals("Wrong length for record value", 9, value.getLength());
    // should have jumped over the delimiter, no record
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 11, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    // next split: check for duplicate or dropped records
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get second record: "ijk" first in this split
    assertEquals("Wrong position after record read", 11, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Get third record: "mno" second in this split
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong position after record read", 16, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // should be at the end of the input
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 19, key.get());
    reader.close();
    inputData = "abcd|efgh|+|ij|kl|+|mno|pqr";
    inputFile = createInputFile(conf, inputData);
    delimiter = "|+|";
    recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    // of the ambiguous bytes of the delimiter
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            // track where we are in the inputdata
            int keyPosition = 0;
            conf.setInt("io.file.buffer.size", bufferSize);
            split = new FileSplit(inputFile, 0, bufferSize, (String[]) null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.initialize(split, context);
            // Get the first record: "abcd|efgh" always possible
            assertTrue("Expected record got nothing", reader.nextKeyValue());
            key = reader.getCurrentKey();
            value = reader.getCurrentValue();
            assertTrue("abcd|efgh".equals(value.toString()));
            // Position should be 0 right at the start
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // Position should be 12 right after the first "|+|"
            keyPosition = 12;
            // get the next record: "ij|kl" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "ij|kl"
                assertTrue("ij|kl".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be 20 after the second "|+|"
                keyPosition = 20;
            }
            // get the third record: "mno|pqr" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "mno|pqr"
                assertTrue("mno|pqr".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be the end of the input
                keyPosition = inputData.length();
            }
            assertFalse("Unexpected record returned", reader.nextKeyValue());
            // no more records can be read we should be at the last position
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // after refresh should be empty
            key = reader.getCurrentKey();
            assertNull("Unexpected key returned", key);
            reader.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 59 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestLineRecordReader method testSplitRecordsForFile.

private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength);
    String delimiter = conf.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    }
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    // read the data without splitting to count the records
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsNoSplits = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsNoSplits;
    }
    reader.close();
    // count the records in the first split
    split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsFirstSplit = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsFirstSplit;
    }
    reader.close();
    // count the records in the second split
    split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    int numRecordsRemainingSplits = 0;
    while (reader.nextKeyValue()) {
        ++numRecordsRemainingSplits;
    }
    reader.close();
    assertEquals("Unexpected number of records in split ", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext)

Example 60 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestPreemptableFileOutputCommitter method testPartialOutputCleanup.

@Test
public void testPartialOutputCleanup() throws FileNotFoundException, IllegalArgumentException, IOException {
    Configuration conf = new Configuration(false);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
    TaskAttemptID tid0 = new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3);
    Path p = spy(new Path("/user/hadoop/out"));
    Path a = new Path("hdfs://user/hadoop/out");
    Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0");
    Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1");
    Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0");
    // (p3 does not exist)
    Path p3 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_2");
    FileStatus[] fsa = new FileStatus[3];
    fsa[0] = new FileStatus();
    fsa[0].setPath(p0);
    fsa[1] = new FileStatus();
    fsa[1].setPath(p1);
    fsa[2] = new FileStatus();
    fsa[2].setPath(p2);
    final FileSystem fs = mock(FileSystem.class);
    when(fs.exists(eq(p0))).thenReturn(true);
    when(fs.exists(eq(p1))).thenReturn(true);
    when(fs.exists(eq(p2))).thenReturn(true);
    when(fs.exists(eq(p3))).thenReturn(false);
    when(fs.delete(eq(p0), eq(true))).thenReturn(true);
    when(fs.delete(eq(p1), eq(true))).thenReturn(true);
    doReturn(fs).when(p).getFileSystem(any(Configuration.class));
    when(fs.makeQualified(eq(p))).thenReturn(a);
    TaskAttemptContext context = mock(TaskAttemptContext.class);
    when(context.getTaskAttemptID()).thenReturn(tid0);
    when(context.getConfiguration()).thenReturn(conf);
    PartialFileOutputCommitter foc = new TestPFOC(p, context, fs);
    foc.cleanUpPartialOutputForTask(context);
    verify(fs).delete(eq(p0), eq(true));
    verify(fs).delete(eq(p1), eq(true));
    verify(fs, times(1)).delete(eq(p3), eq(true));
    verify(fs, never()).delete(eq(p2), eq(true));
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7