use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestLineRecordReader method testUncompressedInputDefaultDelimiterPosValue.
@Test
public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
Configuration conf = new Configuration();
String inputData = "1234567890\r\n12\r\n345";
Path inputFile = createInputFile(conf, inputData);
conf.setInt("io.file.buffer.size", 10);
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
LineRecordReader reader = new LineRecordReader(null);
reader.initialize(split, context);
LongWritable key;
Text value;
reader.nextKeyValue();
key = reader.getCurrentKey();
value = reader.getCurrentValue();
// Get first record:"1234567890"
assertEquals(10, value.getLength());
assertEquals(0, key.get());
reader.nextKeyValue();
// Get second record:"12"
assertEquals(2, value.getLength());
// Key should be 12 right after "1234567890\r\n"
assertEquals(12, key.get());
assertFalse(reader.nextKeyValue());
// Key should be 16 right after "1234567890\r\n12\r\n"
assertEquals(16, key.get());
split = new FileSplit(inputFile, 15, 4, (String[]) null);
reader = new LineRecordReader(null);
reader.initialize(split, context);
// The second split dropped the first record "\n"
reader.nextKeyValue();
key = reader.getCurrentKey();
value = reader.getCurrentValue();
// Get third record:"345"
assertEquals(3, value.getLength());
// Key should be 16 right after "1234567890\r\n12\r\n"
assertEquals(16, key.get());
assertFalse(reader.nextKeyValue());
// Key should be 19 right after "1234567890\r\n12\r\n345"
assertEquals(19, key.get());
inputData = "123456789\r\r\n";
inputFile = createInputFile(conf, inputData);
split = new FileSplit(inputFile, 0, 12, (String[]) null);
reader = new LineRecordReader(null);
reader.initialize(split, context);
reader.nextKeyValue();
key = reader.getCurrentKey();
value = reader.getCurrentValue();
// Get first record:"123456789"
assertEquals(9, value.getLength());
assertEquals(0, key.get());
reader.nextKeyValue();
// Get second record:""
assertEquals(0, value.getLength());
// Key should be 10 right after "123456789\r"
assertEquals(10, key.get());
assertFalse(reader.nextKeyValue());
// Key should be 12 right after "123456789\r\r\n"
assertEquals(12, key.get());
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestLineRecordReader method readRecords.
// Use the LineRecordReader to read records from the file
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException {
// Set up context
File testFile = new File(testFileUrl.getFile());
long testFileSize = testFile.length();
Path testFilePath = new Path(testFile.getAbsolutePath());
Configuration conf = new Configuration();
conf.setInt("io.file.buffer.size", 1);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// Gather the records returned by the record reader
ArrayList<String> records = new ArrayList<String>();
long offset = 0;
while (offset < testFileSize) {
FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
while (reader.nextKeyValue()) {
records.add(reader.getCurrentValue().toString());
}
offset += splitSize;
}
return records;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class StreamInputFormat method createRecordReader.
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
String c = conf.get("stream.recordreader.class");
if (c == null || c.indexOf("LineRecordReader") >= 0) {
return super.createRecordReader(genericSplit, context);
}
// handling non-standard record reader (likely StreamXmlRecordReader)
FileSplit split = (FileSplit) genericSplit;
// LOG.info("getRecordReader start.....split=" + split);
context.setStatus(split.toString());
context.progress();
// Open the file and seek to the start of the split
FileSystem fs = split.getPath().getFileSystem(conf);
FSDataInputStream in = fs.open(split.getPath());
// Factory dispatch based on available params..
Class readerClass;
{
readerClass = StreamUtil.goodClassOrNull(conf, c, null);
if (readerClass == null) {
throw new RuntimeException("Class not found: " + c);
}
}
Constructor ctor;
try {
ctor = readerClass.getConstructor(new Class[] { FSDataInputStream.class, FileSplit.class, TaskAttemptContext.class, Configuration.class, FileSystem.class });
} catch (NoSuchMethodException nsm) {
throw new RuntimeException(nsm);
}
RecordReader<Text, Text> reader;
try {
reader = (RecordReader<Text, Text>) ctor.newInstance(new Object[] { in, split, context, conf, fs });
} catch (Exception nsm) {
throw new RuntimeException(nsm);
}
return reader;
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws IOException, InterruptedException {
Job job = Job.getInstance(conf);
Random random = new Random();
long seed = random.nextLong();
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with a variety of lengths
createFiles(length, numFiles, random, job);
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
// create a combine split for the files
InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context);
MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
try {
while (reader.nextKeyValue()) {
IntWritable key = reader.getCurrentKey();
BytesWritable value = reader.getCurrentValue();
assertNotNull("Value should not be null.", value);
final int k = key.get();
LOG.debug("read " + k);
assertFalse("Key in multiple partitions.", bits.get(k));
bits.set(k);
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestCombineTextInputFormat method readSplit.
private static List<Text> readSplit(InputFormat<LongWritable, Text> format, InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(conf);
RecordReader<LongWritable, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl<LongWritable, Text, LongWritable, Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
return result;
}
Aggregations