use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class ShuffleSchedulerImpl method getMapsForHost.
public synchronized List<TaskAttemptID> getMapsForHost(MapHost host) {
List<TaskAttemptID> list = host.getAndClearKnownMaps();
Iterator<TaskAttemptID> itr = list.iterator();
List<TaskAttemptID> result = new ArrayList<TaskAttemptID>();
int includedMaps = 0;
int totalSize = list.size();
// find the maps that we still need, up to the limit
while (itr.hasNext()) {
TaskAttemptID id = itr.next();
if (!obsoleteMaps.contains(id) && !finishedMaps[id.getTaskID().getId()]) {
result.add(id);
if (++includedMaps >= MAX_MAPS_AT_ONCE) {
break;
}
}
}
// put back the maps left after the limit
while (itr.hasNext()) {
TaskAttemptID id = itr.next();
if (!obsoleteMaps.contains(id) && !finishedMaps[id.getTaskID().getId()]) {
host.addKnownMap(id);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("assigned " + includedMaps + " of " + totalSize + " to " + host + " to " + Thread.currentThread().getName());
}
return result;
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestLineRecordReader method testStripBOM.
@Test
public void testStripBOM() throws IOException {
// the test data contains a BOM at the start of the file
// confirm the BOM is skipped by LineRecordReader
String UTF8_BOM = "";
URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt");
assertNotNull("Cannot find testBOM.txt", testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
int numRecords = 0;
boolean firstLine = true;
boolean skipBOM = true;
while (reader.nextKeyValue()) {
if (firstLine) {
firstLine = false;
if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) {
skipBOM = false;
}
}
++numRecords;
}
reader.close();
assertTrue("BOM is not skipped", skipBOM);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestLineRecordReader method testUncompressedInputCustomDelimiterPosValue.
@Test
public void testUncompressedInputCustomDelimiterPosValue() throws Exception {
Configuration conf = new Configuration();
conf.setInt("io.file.buffer.size", 10);
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
String inputData = "abcdefghij++kl++mno";
Path inputFile = createInputFile(conf, inputData);
String delimiter = "++";
byte[] recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
int splitLength = 15;
FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
// Get first record: "abcdefghij"
assertTrue("Expected record got nothing", reader.nextKeyValue());
LongWritable key = reader.getCurrentKey();
Text value = reader.getCurrentValue();
assertEquals("Wrong length for record value", 10, value.getLength());
assertEquals("Wrong position after record read", 0, key.get());
// Get second record: "kl"
assertTrue("Expected record got nothing", reader.nextKeyValue());
assertEquals("Wrong length for record value", 2, value.getLength());
// Key should be 12 right after "abcdefghij++"
assertEquals("Wrong position after record read", 12, key.get());
// Get third record: "mno"
assertTrue("Expected record got nothing", reader.nextKeyValue());
assertEquals("Wrong length for record value", 3, value.getLength());
// Key should be 16 right after "abcdefghij++kl++"
assertEquals("Wrong position after record read", 16, key.get());
assertFalse(reader.nextKeyValue());
// Key should be 19 right after "abcdefghij++kl++mno"
assertEquals("Wrong position after record read", 19, key.get());
// after refresh should be empty
key = reader.getCurrentKey();
assertNull("Unexpected key returned", key);
reader.close();
split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
// No record is in the second split because the second split dropped
// the first record, which was already reported by the first split.
assertFalse("Unexpected record returned", reader.nextKeyValue());
key = reader.getCurrentKey();
assertNull("Unexpected key returned", key);
reader.close();
// multi char delimiter with starting part of the delimiter in the data
inputData = "abcd+efgh++ijk++mno";
inputFile = createInputFile(conf, inputData);
splitLength = 5;
split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
// Get first record: "abcd+efgh"
assertTrue("Expected record got nothing", reader.nextKeyValue());
key = reader.getCurrentKey();
value = reader.getCurrentValue();
assertEquals("Wrong position after record read", 0, key.get());
assertEquals("Wrong length for record value", 9, value.getLength());
// should have jumped over the delimiter, no record
assertFalse(reader.nextKeyValue());
assertEquals("Wrong position after record read", 11, key.get());
// after refresh should be empty
key = reader.getCurrentKey();
assertNull("Unexpected key returned", key);
reader.close();
// next split: check for duplicate or dropped records
split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
assertTrue("Expected record got nothing", reader.nextKeyValue());
key = reader.getCurrentKey();
value = reader.getCurrentValue();
// Get second record: "ijk" first in this split
assertEquals("Wrong position after record read", 11, key.get());
assertEquals("Wrong length for record value", 3, value.getLength());
// Get third record: "mno" second in this split
assertTrue("Expected record got nothing", reader.nextKeyValue());
assertEquals("Wrong position after record read", 16, key.get());
assertEquals("Wrong length for record value", 3, value.getLength());
// should be at the end of the input
assertFalse(reader.nextKeyValue());
assertEquals("Wrong position after record read", 19, key.get());
reader.close();
inputData = "abcd|efgh|+|ij|kl|+|mno|pqr";
inputFile = createInputFile(conf, inputData);
delimiter = "|+|";
recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
// of the ambiguous bytes of the delimiter
for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
// track where we are in the inputdata
int keyPosition = 0;
conf.setInt("io.file.buffer.size", bufferSize);
split = new FileSplit(inputFile, 0, bufferSize, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
// Get the first record: "abcd|efgh" always possible
assertTrue("Expected record got nothing", reader.nextKeyValue());
key = reader.getCurrentKey();
value = reader.getCurrentValue();
assertTrue("abcd|efgh".equals(value.toString()));
// Position should be 0 right at the start
assertEquals("Wrong position after record read", keyPosition, key.get());
// Position should be 12 right after the first "|+|"
keyPosition = 12;
// get the next record: "ij|kl" if the split/buffer allows it
if (reader.nextKeyValue()) {
// check the record info: "ij|kl"
assertTrue("ij|kl".equals(value.toString()));
assertEquals("Wrong position after record read", keyPosition, key.get());
// Position should be 20 after the second "|+|"
keyPosition = 20;
}
// get the third record: "mno|pqr" if the split/buffer allows it
if (reader.nextKeyValue()) {
// check the record info: "mno|pqr"
assertTrue("mno|pqr".equals(value.toString()));
assertEquals("Wrong position after record read", keyPosition, key.get());
// Position should be the end of the input
keyPosition = inputData.length();
}
assertFalse("Unexpected record returned", reader.nextKeyValue());
// no more records can be read we should be at the last position
assertEquals("Wrong position after record read", keyPosition, key.get());
// after refresh should be empty
key = reader.getCurrentKey();
assertNull("Unexpected key returned", key);
reader.close();
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestLineRecordReader method testSplitRecordsForFile.
private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength);
String delimiter = conf.get("textinputformat.record.delimiter");
byte[] recordDelimiterBytes = null;
if (null != delimiter) {
recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
}
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data without splitting to count the records
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsNoSplits = 0;
while (reader.nextKeyValue()) {
++numRecordsNoSplits;
}
reader.close();
// count the records in the first split
split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsFirstSplit = 0;
while (reader.nextKeyValue()) {
++numRecordsFirstSplit;
}
reader.close();
// count the records in the second split
split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsRemainingSplits = 0;
while (reader.nextKeyValue()) {
++numRecordsRemainingSplits;
}
reader.close();
assertEquals("Unexpected number of records in split ", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestPreemptableFileOutputCommitter method testPartialOutputCleanup.
@Test
public void testPartialOutputCleanup() throws FileNotFoundException, IllegalArgumentException, IOException {
Configuration conf = new Configuration(false);
conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
TaskAttemptID tid0 = new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3);
Path p = spy(new Path("/user/hadoop/out"));
Path a = new Path("hdfs://user/hadoop/out");
Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0");
Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1");
Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0");
// (p3 does not exist)
Path p3 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_2");
FileStatus[] fsa = new FileStatus[3];
fsa[0] = new FileStatus();
fsa[0].setPath(p0);
fsa[1] = new FileStatus();
fsa[1].setPath(p1);
fsa[2] = new FileStatus();
fsa[2].setPath(p2);
final FileSystem fs = mock(FileSystem.class);
when(fs.exists(eq(p0))).thenReturn(true);
when(fs.exists(eq(p1))).thenReturn(true);
when(fs.exists(eq(p2))).thenReturn(true);
when(fs.exists(eq(p3))).thenReturn(false);
when(fs.delete(eq(p0), eq(true))).thenReturn(true);
when(fs.delete(eq(p1), eq(true))).thenReturn(true);
doReturn(fs).when(p).getFileSystem(any(Configuration.class));
when(fs.makeQualified(eq(p))).thenReturn(a);
TaskAttemptContext context = mock(TaskAttemptContext.class);
when(context.getTaskAttemptID()).thenReturn(tid0);
when(context.getConfiguration()).thenReturn(conf);
PartialFileOutputCommitter foc = new TestPFOC(p, context, fs);
foc.cleanUpPartialOutputForTask(context);
verify(fs).delete(eq(p0), eq(true));
verify(fs).delete(eq(p1), eq(true));
verify(fs, times(1)).delete(eq(p3), eq(true));
verify(fs, never()).delete(eq(p2), eq(true));
}
Aggregations