use of org.kohsuke.args4j.CmdLineParser in project Anserini by castorini.
the class SearchWebCollection method main.
public static void main(String[] args) throws Exception {
SearchArgs searchArgs = new SearchArgs();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: SearchWebCollection" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info("Reading index at " + searchArgs.index);
Directory dir;
if (searchArgs.inmem) {
LOG.info("Using MMapDirectory with preload");
dir = new MMapDirectory(Paths.get(searchArgs.index));
((MMapDirectory) dir).setPreload(true);
} else {
LOG.info("Using default FSDirectory");
dir = FSDirectory.open(Paths.get(searchArgs.index));
}
Similarity similarity = null;
if (searchArgs.ql) {
LOG.info("Using QL scoring model");
similarity = new LMDirichletSimilarity(searchArgs.mu);
} else if (searchArgs.bm25) {
LOG.info("Using BM25 scoring model");
similarity = new BM25Similarity(searchArgs.k1, searchArgs.b);
} else {
LOG.error("Error: Must specify scoring model!");
System.exit(-1);
}
RerankerCascade cascade = new RerankerCascade();
boolean useQueryParser = false;
if (searchArgs.rm3) {
cascade.add(new Rm3Reranker(new EnglishAnalyzer(), FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.gov2.txt"));
useQueryParser = true;
} else {
cascade.add(new IdentityReranker());
}
FeatureExtractors extractors = null;
if (searchArgs.extractors != null) {
extractors = FeatureExtractors.loadExtractor(searchArgs.extractors);
}
if (searchArgs.dumpFeatures) {
PrintStream out = new PrintStream(searchArgs.featureFile);
Qrels qrels = new Qrels(searchArgs.qrels);
cascade.add(new WebCollectionLtrDataGenerator(out, qrels, extractors));
}
Path topicsFile = Paths.get(searchArgs.topics);
if (!Files.exists(topicsFile) || !Files.isRegularFile(topicsFile) || !Files.isReadable(topicsFile)) {
throw new IllegalArgumentException("Topics file : " + topicsFile + " does not exist or is not a (readable) file.");
}
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + searchArgs.topicReader + "TopicReader").getConstructor(Path.class).newInstance(topicsFile);
SortedMap<Integer, String> topics = tr.read();
final long start = System.nanoTime();
SearchWebCollection searcher = new SearchWebCollection(searchArgs.index);
searcher.search(topics, searchArgs.output, similarity, searchArgs.hits, cascade, useQueryParser, searchArgs.keepstop);
searcher.close();
final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
LOG.info("Total " + topics.size() + " topics searched in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
}
use of org.kohsuke.args4j.CmdLineParser in project indy by Commonjava.
the class BootOptions method parseArgs.
public boolean parseArgs(final String[] args) throws IndyBootException {
final CmdLineParser parser = new CmdLineParser(this);
boolean canStart = true;
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
throw new IndyBootException("Failed to parse command-line args: %s", e, e.getMessage());
}
if (isHelp()) {
printUsage(parser, null);
canStart = false;
}
return canStart;
}
use of org.kohsuke.args4j.CmdLineParser in project fess by codelibs.
the class SuggestCreator method main.
public static void main(final String[] args) {
final Options options = new Options();
final CmdLineParser parser = new CmdLineParser(options);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("java " + Crawler.class.getCanonicalName() + " [options...] arguments...");
parser.printUsage(System.err);
return;
}
if (logger.isDebugEnabled()) {
try {
ManagementFactory.getRuntimeMXBean().getInputArguments().stream().forEach(s -> logger.debug("Parameter: " + s));
System.getProperties().entrySet().stream().forEach(e -> logger.debug("Property: " + e.getKey() + "=" + e.getValue()));
System.getenv().entrySet().forEach(e -> logger.debug("Env: " + e.getKey() + "=" + e.getValue()));
logger.debug("Option: " + options);
} catch (final Exception e) {
// ignore
}
}
final String transportAddresses = System.getProperty(Constants.FESS_ES_TRANSPORT_ADDRESSES);
if (StringUtil.isNotBlank(transportAddresses)) {
System.setProperty(EsClient.TRANSPORT_ADDRESSES, transportAddresses);
}
final String clusterName = System.getProperty(Constants.FESS_ES_CLUSTER_NAME);
if (StringUtil.isNotBlank(clusterName)) {
System.setProperty(EsClient.CLUSTER_NAME, clusterName);
}
int exitCode;
try {
SingletonLaContainerFactory.setConfigPath("app.xml");
SingletonLaContainerFactory.setExternalContext(new GenericExternalContext());
SingletonLaContainerFactory.setExternalContextComponentDefRegister(new GenericExternalContextComponentDefRegister());
SingletonLaContainerFactory.init();
final Thread shutdownCallback = new Thread("ShutdownHook") {
@Override
public void run() {
if (logger.isDebugEnabled()) {
logger.debug("Destroying LaContainer..");
}
destroyContainer();
}
};
Runtime.getRuntime().addShutdownHook(shutdownCallback);
exitCode = process(options);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("Crawler is stopped.", e);
} else if (logger.isInfoEnabled()) {
logger.info("Crawler is stopped.");
}
exitCode = Constants.EXIT_FAIL;
} catch (final Throwable t) {
logger.error("Suggest creator does not work correctly.", t);
exitCode = Constants.EXIT_FAIL;
} finally {
destroyContainer();
}
logger.info("Finished suggestCreator.");
System.exit(exitCode);
}
use of org.kohsuke.args4j.CmdLineParser in project Anserini by castorini.
the class LookupTopic method main.
public static void main(String[] args) throws Exception {
Args searchArgs = new Args();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: " + LookupNode.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info(String.format("Index: %s", searchArgs.index));
LOG.info(String.format("Query: %s", searchArgs.query));
LOG.info(String.format("Hits: %s", searchArgs.numHits));
LookupTopic lookup = new LookupTopic(searchArgs.index);
lookup.search(searchArgs.query, searchArgs.numHits);
lookup.close();
}
use of org.kohsuke.args4j.CmdLineParser in project Anserini by castorini.
the class FeatureExtractorCli method main.
/**
* requires the user to supply the index directory and also the directory containing the qrels and topics
* @param args indexDir, qrelFile, topicFile, outputFile
*/
public static void main(String[] args) throws Exception {
long curTime = System.nanoTime();
FeatureExtractionArgs parsedArgs = new FeatureExtractionArgs();
CmdLineParser parser = new CmdLineParser(parsedArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
return;
}
Directory indexDirectory = FSDirectory.open(Paths.get(parsedArgs.indexDir));
IndexReader reader = DirectoryReader.open(indexDirectory);
Qrels qrels = new Qrels(parsedArgs.qrelFile);
FeatureExtractors extractors = null;
if (parsedArgs.extractors != null) {
extractors = FeatureExtractors.loadExtractor(parsedArgs.extractors);
}
// Query parser needed to construct the query object for feature extraction in the loop
PrintStream out = new PrintStream(new FileOutputStream(new File(parsedArgs.outputFile)));
if (parsedArgs.collection.equals("gov2") || parsedArgs.collection.equals("webxml")) {
// Open the topics file and read it
String className = parsedArgs.collection.equals("gov2") ? "Trec" : "Webxml";
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + className + "TopicReader").getConstructor(Path.class).newInstance(Paths.get(parsedArgs.topicsFile));
SortedMap<Integer, String> topics = tr.read();
LOG.debug(String.format("%d topics found", topics.size()));
WebFeatureExtractor extractor = new WebFeatureExtractor(reader, qrels, convertTopicsFormat(topics), extractors);
extractor.printFeatures(out);
} else if (parsedArgs.collection.equals("twitter")) {
Map<String, String> topics = MicroblogTopicSet.fromFile(new File(parsedArgs.topicsFile)).toMap();
LOG.debug(String.format("%d topics found", topics.size()));
TwitterFeatureExtractor extractor = new TwitterFeatureExtractor(reader, qrels, topics, extractors);
extractor.printFeatures(out);
} else {
System.err.println("Unrecognized collection " + parsedArgs.collection);
}
}
Aggregations