use of org.apache.hadoop.io.WritableComparable in project Cloud9 by lintool.
the class CreateMetadata method GenerateMetadata.
public static void GenerateMetadata(Path bitextPath, Path resultPath) throws IOException {
System.out.println(bitextPath.toString());
JobConf conf = new JobConf(CreateMetadata.class);
FileSystem fileSys = FileSystem.get(conf);
// SequenceFile.Reader[] x = SequenceFileOutputFormat.getReaders(conf, bitextPath);
SequenceFile.Reader[] x = SequenceFileOutputFormat.getReaders(conf, new Path("/shared/bitexts/ar-en.ldc.10k/ar-en.10k.bitext"));
WritableComparable key = new IntWritable();
PhrasePair value = new PhrasePair();
int sc = 0;
int ec = 0;
int fc = 0;
try {
for (SequenceFile.Reader r : x) while (r.next(key, value)) {
sc = sc + 1;
for (int word : value.getE().getWords()) if (word > ec)
ec = word;
for (int word : value.getF().getWords()) if (word > fc)
fc = word;
}
} catch (IOException e) {
throw new RuntimeException("IO exception: " + e.getMessage());
}
Metadata theMetadata = new Metadata(sc, ec, fc);
ObjectOutputStream mdstream = new ObjectOutputStream(new BufferedOutputStream(FileSystem.get(conf).create(resultPath)));
mdstream.writeObject(theMetadata);
mdstream.close();
}
Aggregations