Examples with LineReader - org.apache.hadoop.util.LineReader

Example 6 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestTextInputFormat method main.

/**
   * Parse the command line arguments into lines and display the result.
   * @param args
   * @throws Exception
   */
public static void main(String[] args) throws Exception {
    for (String arg : args) {
        System.out.println("Working on " + arg);
        LineReader reader = makeStream(unquote(arg));
        Text line = new Text();
        int size = reader.readLine(line);
        while (size > 0) {
            System.out.println("Got: " + line.toString());
            size = reader.readLine(line);
        }
        reader.close();
    }
}

Also used : LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Example 7 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TextOutputReader method initialize.

@Override
public void initialize(PipeMapRed pipeMapRed) throws IOException {
    super.initialize(pipeMapRed);
    clientIn = pipeMapRed.getClientInput();
    conf = pipeMapRed.getConfiguration();
    numKeyFields = pipeMapRed.getNumOfKeyFields();
    separator = pipeMapRed.getFieldSeparator();
    lineReader = new LineReader((InputStream) clientIn, conf);
    key = new Text();
    value = new Text();
    line = new Text();
}

Also used : InputStream(java.io.InputStream) LineReader(org.apache.hadoop.util.LineReader) Text(org.apache.hadoop.io.Text)

Example 8 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestMRKeyValueTextInputFormat method testUTF8.

@Test
public void testUTF8() throws Exception {
    LineReader in = makeStream("abcd€bdcd€");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters", "abcd€bdcd€", line.toString());
    in = makeStream("abc xyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc xyz", line.toString());
}

Also used : LineReader(org.apache.hadoop.util.LineReader) Test(org.junit.Test)

Example 9 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class RoundRobinUserResolver method parseUserList.

/**
   * Userlist assumes one user per line.
   * Each line in users-list-file is of the form &lt;username&gt;[,group]* 
   * <br> Group names are ignored(they are not parsed at all).
   */
private List<UserGroupInformation> parseUserList(URI userUri, Configuration conf) throws IOException {
    if (null == userUri) {
        return Collections.emptyList();
    }
    final Path userloc = new Path(userUri.toString());
    final Text rawUgi = new Text();
    final FileSystem fs = userloc.getFileSystem(conf);
    final ArrayList<UserGroupInformation> ugiList = new ArrayList<UserGroupInformation>();
    LineReader in = null;
    try {
        in = new LineReader(fs.open(userloc));
        while (in.readLine(rawUgi) > 0) {
            //line is of the form username[,group]*
            if (rawUgi.toString().trim().equals("")) {
                //Continue on empty line
                continue;
            }
            // e is end position of user name in this line
            int e = rawUgi.find(",");
            if (e == 0) {
                throw new IOException("Missing username: " + rawUgi);
            }
            if (e == -1) {
                e = rawUgi.getLength();
            }
            final String username = Text.decode(rawUgi.getBytes(), 0, e).trim();
            UserGroupInformation ugi = null;
            try {
                ugi = UserGroupInformation.createProxyUser(username, UserGroupInformation.getLoginUser());
            } catch (IOException ioe) {
                LOG.error("Error while creating a proxy user ", ioe);
            }
            if (ugi != null) {
                ugiList.add(ugi);
            }
        // No need to parse groups, even if they exist. Go to next line
        }
    } finally {
        if (in != null) {
            in.close();
        }
    }
    return ugiList;
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 10 with LineReader

use of org.apache.hadoop.util.LineReader in project hadoop by apache.

the class TestConcatenatedCompressedInput method testBuiltInGzipDecompressor.

/**
   * Test using the new BuiltInGzipDecompressor codec for reading gzip files.
   */
// NOTE:  This fails on RHEL4 with "java.io.IOException: header crc mismatch"
//        due to buggy version of zlib (1.2.1.2) included.
@Test
public void testBuiltInGzipDecompressor() throws IOException {
    JobConf jobConf = new JobConf(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, jobConf);
    localFs.delete(workDir, true);
    // Don't use native libs for this test
    ZlibFactory.setNativeZlibLoaded(false);
    assertEquals("[non-native (Java) codec]", org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.class, gzip.getDecompressorType());
    System.out.println(COLOR_BR_YELLOW + "testBuiltInGzipDecompressor() using" + " non-native (Java Inflater) Decompressor (" + gzip.getDecompressorType() + ")" + COLOR_NORMAL);
    // copy single-member test file to HDFS
    String fn1 = "testConcatThenCompress.txt" + gzip.getDefaultExtension();
    Path fnLocal1 = new Path(System.getProperty("test.concat.data", "/tmp"), fn1);
    Path fnHDFS1 = new Path(workDir, fn1);
    localFs.copyFromLocalFile(fnLocal1, fnHDFS1);
    // copy multiple-member test file to HDFS
    // (actually in "seekable gzip" format, a la JIRA PIG-42)
    String fn2 = "testCompressThenConcat.txt" + gzip.getDefaultExtension();
    Path fnLocal2 = new Path(System.getProperty("test.concat.data", "/tmp"), fn2);
    Path fnHDFS2 = new Path(workDir, fn2);
    localFs.copyFromLocalFile(fnLocal2, fnHDFS2);
    FileInputFormat.setInputPaths(jobConf, workDir);
    // here's first pair of DecompressorStreams:
    final FileInputStream in1 = new FileInputStream(fnLocal1.toString());
    final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
    assertEquals("concat bytes available", 2734, in1.available());
    // w/hdr CRC
    assertEquals("concat bytes available", 3413, in2.available());
    CompressionInputStream cin2 = gzip.createInputStream(in2);
    LineReader in = new LineReader(cin2);
    Text out = new Text();
    int numBytes, totalBytes = 0, lineNum = 0;
    while ((numBytes = in.readLine(out)) > 0) {
        ++lineNum;
        totalBytes += numBytes;
    }
    in.close();
    assertEquals("total uncompressed bytes in concatenated test file", 5346, totalBytes);
    assertEquals("total uncompressed lines in concatenated test file", 84, lineNum);
    ZlibFactory.loadNativeZLib();
    // test GzipZlibDecompressor (native), just to be sure
    // (FIXME?  could move this call to testGzip(), but would need filename
    // setup above) (alternatively, maybe just nuke testGzip() and extend this?)
    doMultipleGzipBufferSizes(jobConf, true);
}

Also used : Path(org.apache.hadoop.fs.Path) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) Text(org.apache.hadoop.io.Text) FileInputStream(java.io.FileInputStream) LineReader(org.apache.hadoop.util.LineReader) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Test(org.junit.Test)

Aggregations

LineReader (org.apache.hadoop.util.LineReader)36 Text (org.apache.hadoop.io.Text)31 Path (org.apache.hadoop.fs.Path)15 FileSystem (org.apache.hadoop.fs.FileSystem)14 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)11 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Counters (org.apache.hadoop.mapred.Counters)4 JobConf (org.apache.hadoop.mapred.JobConf)4 RunningJob (org.apache.hadoop.mapred.RunningJob)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 CommandLine (org.apache.commons.cli.CommandLine)3 CommandLineParser (org.apache.commons.cli.CommandLineParser)3 GnuParser (org.apache.commons.cli.GnuParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3 Options (org.apache.commons.cli.Options)3 ParseException (org.apache.commons.cli.ParseException)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3