use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestConcatenatedCompressedInput method main.
/**
* Parse the command line arguments into lines and display the result.
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
for (String arg : args) {
System.out.println("Working on " + arg);
LineReader reader = makeStream(unquote(arg));
Text line = new Text();
int size = reader.readLine(line);
while (size > 0) {
System.out.println("Got: " + line.toString());
size = reader.readLine(line);
}
reader.close();
}
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestKeyValueTextInputFormat method testNewLines.
@Test
public void testNewLines() throws Exception {
LineReader in = null;
try {
in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
Text out = new Text();
in.readLine(out);
assertEquals("line1 length", 1, out.getLength());
in.readLine(out);
assertEquals("line2 length", 2, out.getLength());
in.readLine(out);
assertEquals("line3 length", 0, out.getLength());
in.readLine(out);
assertEquals("line4 length", 3, out.getLength());
in.readLine(out);
assertEquals("line5 length", 4, out.getLength());
in.readLine(out);
assertEquals("line5 length", 5, out.getLength());
assertEquals("end of file", 0, in.readLine(out));
} finally {
if (in != null) {
in.close();
}
}
}
use of org.apache.hadoop.util.LineReader in project hadoop by apache.
the class TestKeyValueTextInputFormat method testUTF8.
@Test
public void testUTF8() throws Exception {
LineReader in = null;
try {
in = makeStream("abcd€bdcd€");
Text line = new Text();
in.readLine(line);
assertEquals("readLine changed utf8 characters", "abcd€bdcd€", line.toString());
in = makeStream("abc xyz");
in.readLine(line);
assertEquals("split on fake newline", "abc xyz", line.toString());
} finally {
if (in != null) {
in.close();
}
}
}
use of org.apache.hadoop.util.LineReader in project Cloud9 by lintool.
the class IT method testDemoCountDocs.
private void testDemoCountDocs() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPath));
String output = tmpPrefix + "-cnt";
String records = tmpPrefix + "-records.txt";
String[] args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.collection.trec.CountTrecDocuments.class.getCanonicalName(), "-collection=" + collectionPath, "-output=" + output, "-docnoMapping=" + mappingFile, "-countOutput=" + records };
IntegrationUtils.exec(Joiner.on(" ").join(args));
LineReader reader = new LineReader(fs.open(new Path(records)));
Text str = new Text();
reader.readLine(str);
reader.close();
assertEquals(472525, Integer.parseInt(str.toString()));
}
use of org.apache.hadoop.util.LineReader in project Cloud9 by lintool.
the class IT method testDemoCountDocsRaw.
private void testDemoCountDocsRaw() throws Exception {
Configuration conf = IntegrationUtils.getBespinConfiguration();
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(collectionPathRaw));
String records = tmpPrefix + "-records.txt";
String[] args = new String[] { "hadoop jar", IntegrationUtils.getJar("target", "cloud9"), edu.umd.cloud9.collection.clue.CountClueWarcRecords.class.getCanonicalName(), "-original", "-segment=1", "-path=" + collectionPathRaw, "-docnoMapping=" + mappingFile, "-countOutput=" + records };
IntegrationUtils.exec(Joiner.on(" ").join(args));
LineReader reader = new LineReader(fs.open(new Path(records)));
Text str = new Text();
reader.readLine(str);
reader.close();
assertEquals(50220423, Integer.parseInt(str.toString()));
}
Aggregations