use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestTextInputFormat method main.
/**
* Parse the command line arguments into lines and display the result.
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
for (String arg : args) {
System.out.println("Working on " + arg);
LineReader reader = makeStream(unquote(arg));
Text line = new Text();
int size = reader.readLine(line);
while (size > 0) {
System.out.println("Got: " + line.toString());
size = reader.readLine(line);
}
reader.close();
}
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TextOutputReader method initialize.
@Override
public void initialize(PipeMapRed pipeMapRed) throws IOException {
super.initialize(pipeMapRed);
clientIn = pipeMapRed.getClientInput();
conf = pipeMapRed.getConfiguration();
numKeyFields = pipeMapRed.getNumOfKeyFields();
separator = pipeMapRed.getFieldSeparator();
lineReader = new LineReader((InputStream) clientIn, conf);
key = new Text();
value = new Text();
line = new Text();
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestMRKeyValueTextInputFormat method testUTF8.
@Test
public void testUTF8() throws Exception {
LineReader in = makeStream("abcd€bdcd€");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters", "abcd€bdcd€", line.toString());
in = makeStream("abc xyz");
in.readLine(line);
assertEquals("split on fake newline", "abc xyz", line.toString());
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class RoundRobinUserResolver method parseUserList.
/**
* Userlist assumes one user per line.
* Each line in users-list-file is of the form <username>[,group]*
* <br> Group names are ignored(they are not parsed at all).
*/
private List<UserGroupInformation> parseUserList(URI userUri, Configuration conf) throws IOException {
if (null == userUri) {
return Collections.emptyList();
}
final Path userloc = new Path(userUri.toString());
final Text rawUgi = new Text();
final FileSystem fs = userloc.getFileSystem(conf);
final ArrayList<UserGroupInformation> ugiList = new ArrayList<UserGroupInformation>();
LineReader in = null;
try {
in = new LineReader(fs.open(userloc));
while (in.readLine(rawUgi) > 0) {
//line is of the form username[,group]*
if (rawUgi.toString().trim().equals("")) {
//Continue on empty line
continue;
}
// e is end position of user name in this line
int e = rawUgi.find(",");
if (e == 0) {
throw new IOException("Missing username: " + rawUgi);
}
if (e == -1) {
e = rawUgi.getLength();
}
final String username = Text.decode(rawUgi.getBytes(), 0, e).trim();
UserGroupInformation ugi = null;
try {
ugi = UserGroupInformation.createProxyUser(username, UserGroupInformation.getLoginUser());
} catch (IOException ioe) {
LOG.error("Error while creating a proxy user ", ioe);
}
if (ugi != null) {
ugiList.add(ugi);
}
// No need to parse groups, even if they exist. Go to next line
}
} finally {
if (in != null) {
in.close();
}
}
return ugiList;
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestConcatenatedCompressedInput method testBuiltInGzipDecompressor.
/**
* Test using the new BuiltInGzipDecompressor codec for reading gzip files.
*/
// NOTE: This fails on RHEL4 with "java.io.IOException: header crc mismatch"
// due to buggy version of zlib (1.2.1.2) included.
@Test
public void testBuiltInGzipDecompressor() throws IOException {
JobConf jobConf = new JobConf(defaultConf);
CompressionCodec gzip = new GzipCodec();
ReflectionUtils.setConf(gzip, jobConf);
localFs.delete(workDir, true);
// Don't use native libs for this test
ZlibFactory.setNativeZlibLoaded(false);
assertEquals("[non-native (Java) codec]", org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.class, gzip.getDecompressorType());
System.out.println(COLOR_BR_YELLOW + "testBuiltInGzipDecompressor() using" + " non-native (Java Inflater) Decompressor (" + gzip.getDecompressorType() + ")" + COLOR_NORMAL);
// copy single-member test file to HDFS
String fn1 = "testConcatThenCompress.txt" + gzip.getDefaultExtension();
Path fnLocal1 = new Path(System.getProperty("test.concat.data", "/tmp"), fn1);
Path fnHDFS1 = new Path(workDir, fn1);
localFs.copyFromLocalFile(fnLocal1, fnHDFS1);
// copy multiple-member test file to HDFS
// (actually in "seekable gzip" format, a la JIRA PIG-42)
String fn2 = "testCompressThenConcat.txt" + gzip.getDefaultExtension();
Path fnLocal2 = new Path(System.getProperty("test.concat.data", "/tmp"), fn2);
Path fnHDFS2 = new Path(workDir, fn2);
localFs.copyFromLocalFile(fnLocal2, fnHDFS2);
FileInputFormat.setInputPaths(jobConf, workDir);
// here's first pair of DecompressorStreams:
final FileInputStream in1 = new FileInputStream(fnLocal1.toString());
final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
assertEquals("concat bytes available", 2734, in1.available());
// w/hdr CRC
assertEquals("concat bytes available", 3413, in2.available());
CompressionInputStream cin2 = gzip.createInputStream(in2);
LineReader in = new LineReader(cin2);
Text out = new Text();
int numBytes, totalBytes = 0, lineNum = 0;
while ((numBytes = in.readLine(out)) > 0) {
++lineNum;
totalBytes += numBytes;
}
in.close();
assertEquals("total uncompressed bytes in concatenated test file", 5346, totalBytes);
assertEquals("total uncompressed lines in concatenated test file", 84, lineNum);
ZlibFactory.loadNativeZLib();
// test GzipZlibDecompressor (native), just to be sure
// (FIXME? could move this call to testGzip(), but would need filename
// setup above) (alternatively, maybe just nuke testGzip() and extend this?)
doMultipleGzipBufferSizes(jobConf, true);
}
Aggregations