Search in sources :

Example 11 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestConcatenatedCompressedInput method main.

/**
   * Parse the command line arguments into lines and display the result.
   * @param args
   * @throws Exception
   */
public static void main(String[] args) throws Exception {
    for (String arg : args) {
        System.out.println("Working on " + arg);
        LineReader reader = makeStream(unquote(arg));
        Text line = new Text();
        int size = reader.readLine(line);
        while (size > 0) {
            System.out.println("Got: " + line.toString());
            size = reader.readLine(line);
        }
        reader.close();
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Example 12 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestKeyValueTextInputFormat method testNewLines.

@Test
public void testNewLines() throws Exception {
    LineReader in = null;
    try {
        in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
        Text out = new Text();
        in.readLine(out);
        assertEquals("line1 length", 1, out.getLength());
        in.readLine(out);
        assertEquals("line2 length", 2, out.getLength());
        in.readLine(out);
        assertEquals("line3 length", 0, out.getLength());
        in.readLine(out);
        assertEquals("line4 length", 3, out.getLength());
        in.readLine(out);
        assertEquals("line5 length", 4, out.getLength());
        in.readLine(out);
        assertEquals("line5 length", 5, out.getLength());
        assertEquals("end of file", 0, in.readLine(out));
    } finally {
        if (in != null) {
            in.close();
        }
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 13 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestKeyValueTextInputFormat method testUTF8.

@Test
public void testUTF8() throws Exception {
    LineReader in = null;
    try {
        in = makeStream("abcd€bdcd€");
        Text line = new Text();
        in.readLine(line);
        assertEquals("readLine changed utf8 characters", "abcd€bdcd€", line.toString());
        in = makeStream("abc xyz");
        in.readLine(line);
        assertEquals("split on fake newline", "abc xyz", line.toString());
    } finally {
        if (in != null) {
            in.close();
        }
    }
}
Also used : LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 14 with LineReader

use of org.apache.hadoop.util.LineReader in project Cloud9 by lintool.

the class IT method testDemoCountDocs.

private void testDemoCountDocs() throws Exception {
    Configuration conf = IntegrationUtils.getBespinConfiguration();
    FileSystem fs = FileSystem.get(conf);
    assertTrue(fs.exists(collectionPath));
    String output = tmpPrefix + "-cnt";
    String records = tmpPrefix + "-records.txt";
    String[] args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.collection.trec.CountTrecDocuments.class.getCanonicalName(), "-collection=" + collectionPath, "-output=" + output, "-docnoMapping=" + mappingFile, "-countOutput=" + records };
    IntegrationUtils.exec(Joiner.on(" ").join(args));
    LineReader reader = new LineReader(fs.open(new Path(records)));
    Text str = new Text();
    reader.readLine(str);
    reader.close();
    assertEquals(472525, Integer.parseInt(str.toString()));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Example 15 with LineReader

use of org.apache.hadoop.util.LineReader in project Cloud9 by lintool.

the class IT method testDemoCountDocsRaw.

private void testDemoCountDocsRaw() throws Exception {
    Configuration conf = IntegrationUtils.getBespinConfiguration();
    FileSystem fs = FileSystem.get(conf);
    assertTrue(fs.exists(collectionPathRaw));
    String records = tmpPrefix + "-records.txt";
    String[] args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.collection.clue.CountClueWarcRecords.class.getCanonicalName(), "-original", "-segment=1", "-path=" + collectionPathRaw, "-docnoMapping=" + mappingFile, "-countOutput=" + records };
    IntegrationUtils.exec(Joiner.on(" ").join(args));
    LineReader reader = new LineReader(fs.open(new Path(records)));
    Text str = new Text();
    reader.readLine(str);
    reader.close();
    assertEquals(50220423, Integer.parseInt(str.toString()));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Aggregations

LineReader (org.apache.hadoop.util.LineReader)36 Text (org.apache.hadoop.io.Text)31 Path (org.apache.hadoop.fs.Path)15 FileSystem (org.apache.hadoop.fs.FileSystem)14 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)11 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Counters (org.apache.hadoop.mapred.Counters)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 CommandLine (org.apache.commons.cli.CommandLine)3 CommandLineParser (org.apache.commons.cli.CommandLineParser)3 GnuParser (org.apache.commons.cli.GnuParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3 Options (org.apache.commons.cli.Options)3 ParseException (org.apache.commons.cli.ParseException)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3