Search in sources :

Example 21 with LongWritable

use of in project hadoop by apache.

the class TestLineRecordReader method testUncompressedInputDefaultDelimiterPosValue.

public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    String inputData = "1234567890\r\n12\r\n345";
    Path inputFile = createInputFile(conf, inputData);
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
    LineRecordReader reader = new LineRecordReader(conf, split, null);
    LongWritable key = new LongWritable();
    Text value = new Text();, value);
    // Get first record:"1234567890"
    assertEquals(10, value.getLength());
    // Position should be 12 right after "1234567890\r\n"
    assertEquals(12, reader.getPos());, value);
    // Get second record:"12"
    assertEquals(2, value.getLength());
    // Position should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, reader.getPos());
    assertFalse(, value));
    split = new FileSplit(inputFile, 15, 4, (String[]) null);
    reader = new LineRecordReader(conf, split, null);
    // The second split dropped the first record "\n"
    // The position should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, reader.getPos());, value);
    // Get third record:"345"
    assertEquals(3, value.getLength());
    // Position should be 19 right after "1234567890\r\n12\r\n345"
    assertEquals(19, reader.getPos());
    assertFalse(, value));
    assertEquals(19, reader.getPos());
    inputData = "123456789\r\r\n";
    inputFile = createInputFile(conf, inputData);
    split = new FileSplit(inputFile, 0, 12, (String[]) null);
    reader = new LineRecordReader(conf, split, null);, value);
    // Get first record:"123456789"
    assertEquals(9, value.getLength());
    // Position should be 10 right after "123456789\r"
    assertEquals(10, reader.getPos());, value);
    // Get second record:""
    assertEquals(0, value.getLength());
    // Position should be 12 right after "123456789\r\r\n"
    assertEquals(12, reader.getPos());
    assertFalse(, value));
    assertEquals(12, reader.getPos());
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Text( LongWritable( Test(org.junit.Test)

Example 22 with LongWritable

use of in project hadoop by apache.

the class TestLineRecordReader method testMultipleClose.

public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(conf, split);
    LongWritable key = new LongWritable();
    Text value = new Text();
    //noinspection StatementWithEmptyBody
    while (, value)) ;
    BZip2Codec codec = new BZip2Codec();
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
    assertEquals(10, decompressors.size());
Also used : Path(org.apache.hadoop.fs.Path) Decompressor( Configuration(org.apache.hadoop.conf.Configuration) Text( BZip2Codec( URL( LongWritable( File( HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with LongWritable

use of in project hadoop by apache.

the class TestLineRecordReader method testLargeSplitRecordForFile.

private void testLargeSplitRecordForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    assertTrue("unexpected firstSplitLength:" + firstSplitLength, testFileSize < firstSplitLength);
    String delimiter = conf.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    // read the data without splitting to count the records
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes);
    LongWritable key = new LongWritable();
    Text value = new Text();
    int numRecordsNoSplits = 0;
    while (, value)) {
    // count the records in the first split
    split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
    reader = new LineRecordReader(conf, split, recordDelimiterBytes);
    int numRecordsFirstSplit = 0;
    while (, value)) {
    assertEquals("Unexpected number of records in split", numRecordsNoSplits, numRecordsFirstSplit);
Also used : Text( LongWritable(

Example 24 with LongWritable

use of in project hadoop by apache.

the class TestLineRecordReader method testSplitRecordsForFile.

private void testSplitRecordsForFile(Configuration conf, long firstSplitLength, long testFileSize, Path testFilePath) throws IOException {
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    assertTrue("unexpected test data at " + testFilePath, testFileSize > firstSplitLength);
    String delimiter = conf.get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
        recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    // read the data without splitting to count the records
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(conf, split, recordDelimiterBytes);
    LongWritable key = new LongWritable();
    Text value = new Text();
    int numRecordsNoSplits = 0;
    while (, value)) {
    // count the records in the first split
    split = new FileSplit(testFilePath, 0, firstSplitLength, (String[]) null);
    reader = new LineRecordReader(conf, split, recordDelimiterBytes);
    int numRecordsFirstSplit = 0;
    while (, value)) {
    // count the records in the second split
    split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[]) null);
    reader = new LineRecordReader(conf, split, recordDelimiterBytes);
    int numRecordsRemainingSplits = 0;
    while (, value)) {
    assertEquals("Unexpected number of records in split", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
Also used : Text( LongWritable(

Example 25 with LongWritable

use of in project hadoop by apache.

the class DFSCIOTest method createControlFile.

private static void createControlFile(FileSystem fs, // in MB 
int fileSize, int nrFiles) throws IOException {"creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");
    fs.delete(CONTROL_DIR, true);
    for (int i = 0; i < nrFiles; i++) {
        String name = getFileName(i);
        Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
            writer.append(new Text(name), new LongWritable(fileSize));
        } catch (Exception e) {
            throw new IOException(e.getLocalizedMessage());
        } finally {
            if (writer != null)
            writer = null;
    }"created control files for: " + nrFiles + " files");
Also used : SequenceFile( Text( LongWritable( IOException( IOException(


LongWritable ( Text ( Test (org.junit.Test)171 IntWritable ( Path (org.apache.hadoop.fs.Path)99 BytesWritable ( FloatWritable ( Configuration (org.apache.hadoop.conf.Configuration)62 DoubleWritable ( BooleanWritable ( ArrayList (java.util.ArrayList)59 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)57 ShortWritable ( IOException ( ByteWritable ( SequenceFile ( HiveDecimalWritable ( FileSystem (org.apache.hadoop.fs.FileSystem)37 JobConf (org.apache.hadoop.mapred.JobConf)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)35