Search in sources :

Example 31 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestTextInputFormat method testNewLines.

/**
   * Test readLine for various kinds of line termination sequneces.
   * Varies buffer size to stress test.  Also check that returned
   * value matches the string length.
   *
   * @throws Exception
   */
@Test(timeout = 5000)
public void testNewLines() throws Exception {
    final String STR = "a\nbb\n\nccc\rdddd\r\r\r\n\r\neeeee";
    final int STRLENBYTES = STR.getBytes().length;
    Text out = new Text();
    for (int bufsz = 1; bufsz < STRLENBYTES + 1; ++bufsz) {
        LineReader in = makeStream(STR, bufsz);
        int c = 0;
        //"a"\n
        c += in.readLine(out);
        assertEquals("line1 length, bufsz:" + bufsz, 1, out.getLength());
        //"bb"\n
        c += in.readLine(out);
        assertEquals("line2 length, bufsz:" + bufsz, 2, out.getLength());
        //""\n
        c += in.readLine(out);
        assertEquals("line3 length, bufsz:" + bufsz, 0, out.getLength());
        //"ccc"\r
        c += in.readLine(out);
        assertEquals("line4 length, bufsz:" + bufsz, 3, out.getLength());
        //dddd\r
        c += in.readLine(out);
        assertEquals("line5 length, bufsz:" + bufsz, 4, out.getLength());
        //""\r
        c += in.readLine(out);
        assertEquals("line6 length, bufsz:" + bufsz, 0, out.getLength());
        //""\r\n
        c += in.readLine(out);
        assertEquals("line7 length, bufsz:" + bufsz, 0, out.getLength());
        //""\r\n
        c += in.readLine(out);
        assertEquals("line8 length, bufsz:" + bufsz, 0, out.getLength());
        //"eeeee"EOF
        c += in.readLine(out);
        assertEquals("line9 length, bufsz:" + bufsz, 5, out.getLength());
        assertEquals("end of file, bufsz: " + bufsz, 0, in.readLine(out));
        assertEquals("total bytes, bufsz: " + bufsz, c, STRLENBYTES);
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 32 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestTextInputFormat method testMaxLineLength.

/**
   * Test readLine for correct interpretation of maxLineLength
   * (returned string should be clipped at maxLineLength, and the
   * remaining bytes on the same line should be thrown out).
   * Also check that returned value matches the string length.
   * Varies buffer size to stress test.
   *
   * @throws Exception
   */
@Test(timeout = 5000)
public void testMaxLineLength() throws Exception {
    final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
    final int STRLENBYTES = STR.getBytes().length;
    Text out = new Text();
    for (int bufsz = 1; bufsz < STRLENBYTES + 1; ++bufsz) {
        LineReader in = makeStream(STR, bufsz);
        int c = 0;
        c += in.readLine(out, 1);
        assertEquals("line1 length, bufsz: " + bufsz, 1, out.getLength());
        c += in.readLine(out, 1);
        assertEquals("line2 length, bufsz: " + bufsz, 1, out.getLength());
        c += in.readLine(out, 1);
        assertEquals("line3 length, bufsz: " + bufsz, 0, out.getLength());
        c += in.readLine(out, 3);
        assertEquals("line4 length, bufsz: " + bufsz, 3, out.getLength());
        c += in.readLine(out, 10);
        assertEquals("line5 length, bufsz: " + bufsz, 4, out.getLength());
        c += in.readLine(out, 8);
        assertEquals("line5 length, bufsz: " + bufsz, 5, out.getLength());
        assertEquals("end of file, bufsz: " + bufsz, 0, in.readLine(out));
        assertEquals("total bytes, bufsz: " + bufsz, c, STRLENBYTES);
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 33 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestMRKeyValueTextInputFormat method testNewLines.

@Test
public void testNewLines() throws Exception {
    LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 34 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class Hadoop20JHParser method canParse.

/**
   * Can this parser parse the input?
   * 
   * @param input
   * @return Whether this parser can parse the input.
   * @throws IOException
   * 
   *           We will deem a stream to be a good 0.20 job history stream if the
   *           first line is exactly "Meta VERSION=\"1\" ."
   */
public static boolean canParse(InputStream input) throws IOException {
    try {
        LineReader reader = new LineReader(input);
        Text buffer = new Text();
        return reader.readLine(buffer) != 0 && buffer.toString().equals("Meta VERSION=\"1\" .");
    } catch (EOFException e) {
        return false;
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) EOFException(java.io.EOFException) Text(org.apache.hadoop.io.Text)

Example 35 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class KeyOnlyTextOutputReader method initialize.

@Override
public void initialize(PipeMapRed pipeMapRed) throws IOException {
    super.initialize(pipeMapRed);
    clientIn = pipeMapRed.getClientInput();
    conf = pipeMapRed.getConfiguration();
    lineReader = new LineReader((InputStream) clientIn, conf);
    key = new Text();
    line = new Text();
}
Also used : InputStream(java.io.InputStream) LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Aggregations

LineReader (org.apache.hadoop.util.LineReader)36 Text (org.apache.hadoop.io.Text)31 Path (org.apache.hadoop.fs.Path)15 FileSystem (org.apache.hadoop.fs.FileSystem)14 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)11 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Counters (org.apache.hadoop.mapred.Counters)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 CommandLine (org.apache.commons.cli.CommandLine)3 CommandLineParser (org.apache.commons.cli.CommandLineParser)3 GnuParser (org.apache.commons.cli.GnuParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3 Options (org.apache.commons.cli.Options)3 ParseException (org.apache.commons.cli.ParseException)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3