use of com.twitter.elephantbird.mapreduce.input.LuceneHdfsDirectory in project elephant-bird by twitter.
the class HdfsMergeTool method execute.
@Override
public void execute(Mapper.Context context) throws IOException {
Configuration conf = HadoopCompat.getConfiguration(context);
List<String> indexes = HadoopUtils.readStringListFromConfAsBase64(INDEXES_KEY, conf);
Path output = new Path(conf.get(OUTPUT_KEY));
File tmpDirFile = Files.createTempDir();
int maxMergeFactor = conf.getInt(MAX_MERGE_FACTOR_KEY, -1);
Preconditions.checkArgument(maxMergeFactor > 0);
Directory directory = new SimpleFSDirectory(tmpDirFile, NoLockFactory.getNoLockFactory());
IndexWriter writer = LuceneIndexOutputFormat.createIndexWriter(directory, new LuceneIndexOutputFormat.NeverTokenizeAnalyzer(), maxMergeFactor);
Directory[] dirs = new Directory[indexes.size()];
int dir = 0;
for (String index : indexes) {
dirs[dir++] = new LuceneHdfsDirectory(index, FileSystem.get(conf));
}
LOG.info("Adding indexes: " + indexes);
writer.addIndexes(dirs);
LOG.info("Force mergeing...");
writer.forceMerge(1);
LOG.info("Closing writer...");
writer.close();
FileSystem fs = FileSystem.get(conf);
LOG.info("Copying index to HDFS...");
if (!FileUtil.copy(tmpDirFile, fs, output, true, conf)) {
throw new IOException("Failed to copy local index to HDFS!");
}
LOG.info("Index written to: " + output);
}
Aggregations