use of org.apache.hadoop.io.LongWritable in project hadoop by apache.
the class TestLineRecordReader method testUncompressedInputDefaultDelimiterPosValue.
@Test
public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
Configuration conf = new Configuration();
String inputData = "1234567890\r\n12\r\n345";
Path inputFile = createInputFile(conf, inputData);
conf.setInt("io.file.buffer.size", 10);
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split, null);
LongWritable key = new LongWritable();
Text value = new Text();
reader.next(key, value);
// Get first record:"1234567890"
assertEquals(10, value.getLength());
// Position should be 12 right after "1234567890\r\n"
assertEquals(12, reader.getPos());
reader.next(key, value);
// Get second record:"12"
assertEquals(2, value.getLength());
// Position should be 16 right after "1234567890\r\n12\r\n"
assertEquals(16, reader.getPos());
assertFalse(reader.next(key, value));
split = new FileSplit(inputFile, 15, 4, (String[]) null);
reader = new LineRecordReader(conf, split, null);
// The second split dropped the first record "\n"
// The position should be 16 right after "1234567890\r\n12\r\n"
assertEquals(16, reader.getPos());
reader.next(key, value);
// Get third record:"345"
assertEquals(3, value.getLength());
// Position should be 19 right after "1234567890\r\n12\r\n345"
assertEquals(19, reader.getPos());
assertFalse(reader.next(key, value));
assertEquals(19, reader.getPos());
inputData = "123456789\r\r\n";
inputFile = createInputFile(conf, inputData);
split = new FileSplit(inputFile, 0, 12, (String[]) null);
reader = new LineRecordReader(conf, split, null);
reader.next(key, value);
// Get first record:"123456789"
assertEquals(9, value.getLength());
// Position should be 10 right after "123456789\r"
assertEquals(10, reader.getPos());
reader.next(key, value);
// Get second record:""
assertEquals(0, value.getLength());
// Position should be 12 right after "123456789\r\r\n"
assertEquals(12, reader.getPos());
assertFalse(reader.next(key, value));
assertEquals(12, reader.getPos());
}
use of org.apache.hadoop.io.LongWritable in project hadoop by apache.
the class TestLineRecordReader method testMultipleClose.
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
use of org.apache.hadoop.io.LongWritable in project hadoop by apache.
the class TestLineRecordReader method testLargeSplitRecordForFile.
private void testLargeSplitRecordForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
assertTrue("unexpected firstSplitLength:" + firstSplitLength, testFileSize < firstSplitLength);
String delimiter = conf.get("textinputformat.record.delimiter");
byte[] recordDelimiterBytes = null;
if (null != delimiter) {
recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
}
// read the data without splitting to count the records
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes);
LongWritable key = new LongWritable();
Text value = new Text();
int numRecordsNoSplits = 0;
while (reader.next(key, value)) {
++numRecordsNoSplits;
}
reader.close();
// count the records in the first split
split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
int numRecordsFirstSplit = 0;
while (reader.next(key, value)) {
++numRecordsFirstSplit;
}
reader.close();
assertEquals("Unexpected number of records in split", numRecordsNoSplits, numRecordsFirstSplit);
}
use of org.apache.hadoop.io.LongWritable in project hadoop by apache.
the class TestLineRecordReader method testSplitRecordsForFile.
private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength);
String delimiter = conf.get("textinputformat.record.delimiter");
byte[] recordDelimiterBytes = null;
if (null != delimiter) {
recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
}
// read the data without splitting to count the records
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes);
LongWritable key = new LongWritable();
Text value = new Text();
int numRecordsNoSplits = 0;
while (reader.next(key, value)) {
++numRecordsNoSplits;
}
reader.close();
// count the records in the first split
split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
int numRecordsFirstSplit = 0;
while (reader.next(key, value)) {
++numRecordsFirstSplit;
}
reader.close();
// count the records in the second split
split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null);
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
int numRecordsRemainingSplits = 0;
while (reader.next(key, value)) {
++numRecordsRemainingSplits;
}
reader.close();
assertEquals("Unexpected number of records in split", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
use of org.apache.hadoop.io.LongWritable in project hadoop by apache.
the class DFSCIOTest method createControlFile.
private static void createControlFile(FileSystem fs, // in MB
int fileSize, int nrFiles) throws IOException {
LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");
fs.delete(CONTROL_DIR, true);
for (int i = 0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
writer.append(new Text(name), new LongWritable(fileSize));
} catch (Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: " + nrFiles + " files");
}
Aggregations