use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class LookupTopic method main.
public static void main(String[] args) throws Exception {
Args searchArgs = new Args();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: " + LookupNode.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info(String.format("Index: %s", searchArgs.index));
LOG.info(String.format("Query: %s", searchArgs.query));
LOG.info(String.format("Hits: %s", searchArgs.numHits));
LookupTopic lookup = new LookupTopic(searchArgs.index);
lookup.search(searchArgs.query, searchArgs.numHits);
lookup.close();
}
use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class FeatureExtractorCli method main.
/**
* requires the user to supply the index directory and also the directory containing the qrels and topics
* @param args indexDir, qrelFile, topicFile, outputFile
*/
public static void main(String[] args) throws Exception {
long curTime = System.nanoTime();
FeatureExtractionArgs parsedArgs = new FeatureExtractionArgs();
CmdLineParser parser = new CmdLineParser(parsedArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
return;
}
Directory indexDirectory = FSDirectory.open(Paths.get(parsedArgs.indexDir));
IndexReader reader = DirectoryReader.open(indexDirectory);
Qrels qrels = new Qrels(parsedArgs.qrelFile);
FeatureExtractors extractors = null;
if (parsedArgs.extractors != null) {
extractors = FeatureExtractors.loadExtractor(parsedArgs.extractors);
}
// Query parser needed to construct the query object for feature extraction in the loop
PrintStream out = new PrintStream(new FileOutputStream(new File(parsedArgs.outputFile)));
if (parsedArgs.collection.equals("gov2") || parsedArgs.collection.equals("webxml")) {
// Open the topics file and read it
String className = parsedArgs.collection.equals("gov2") ? "Trec" : "Webxml";
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + className + "TopicReader").getConstructor(Path.class).newInstance(Paths.get(parsedArgs.topicsFile));
SortedMap<Integer, String> topics = tr.read();
LOG.debug(String.format("%d topics found", topics.size()));
WebFeatureExtractor extractor = new WebFeatureExtractor(reader, qrels, convertTopicsFormat(topics), extractors);
extractor.printFeatures(out);
} else if (parsedArgs.collection.equals("twitter")) {
Map<String, String> topics = MicroblogTopicSet.fromFile(new File(parsedArgs.topicsFile)).toMap();
LOG.debug(String.format("%d topics found", topics.size()));
TwitterFeatureExtractor extractor = new TwitterFeatureExtractor(reader, qrels, topics, extractors);
extractor.printFeatures(out);
} else {
System.err.println("Unrecognized collection " + parsedArgs.collection);
}
}
use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class SearchTweets method main.
public static void main(String[] args) throws Exception {
long initializationTime = System.currentTimeMillis();
SearchArgs searchArgs = new SearchArgs();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: SearchTweets" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info("Reading index at " + searchArgs.index);
Directory dir;
if (searchArgs.inmem) {
LOG.info("Using MMapDirectory with preload");
dir = new MMapDirectory(Paths.get(searchArgs.index));
((MMapDirectory) dir).setPreload(true);
} else {
LOG.info("Using default FSDirectory");
dir = FSDirectory.open(Paths.get(searchArgs.index));
}
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
if (searchArgs.ql) {
LOG.info("Using QL scoring model");
searcher.setSimilarity(new LMDirichletSimilarity(searchArgs.mu));
} else if (searchArgs.bm25) {
LOG.info("Using BM25 scoring model");
searcher.setSimilarity(new BM25Similarity(searchArgs.k1, searchArgs.b));
} else {
LOG.error("Error: Must specify scoring model!");
System.exit(-1);
}
RerankerCascade cascade = new RerankerCascade();
EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();
if (searchArgs.rm3) {
cascade.add(new Rm3Reranker(englishAnalyzer, FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.twitter.txt"));
cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
} else {
cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
}
if (!searchArgs.model.isEmpty() && searchArgs.extractors != null) {
LOG.debug(String.format("Ranklib model used, modeled loaded from %s", searchArgs.model));
cascade.add(new RankLibReranker(searchArgs.model, FIELD_BODY, searchArgs.extractors));
}
FeatureExtractors extractorChain = null;
if (searchArgs.extractors != null) {
extractorChain = FeatureExtractors.loadExtractor(searchArgs.extractors);
}
if (searchArgs.dumpFeatures) {
PrintStream out = new PrintStream(searchArgs.featureFile);
Qrels qrels = new Qrels(searchArgs.qrels);
cascade.add(new TweetsLtrDataGenerator(out, qrels, extractorChain));
}
MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(searchArgs.topics));
PrintStream out = new PrintStream(new FileOutputStream(new File(searchArgs.output)));
LOG.info("Writing output to " + searchArgs.output);
LOG.info("Initialized complete! (elapsed time = " + (System.currentTimeMillis() - initializationTime) + "ms)");
long totalTime = 0;
int cnt = 0;
for (MicroblogTopic topic : topics) {
long curQueryTime = System.currentTimeMillis();
// do not cosider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = TermRangeQuery.newStringRange(FIELD_ID, "0", String.valueOf(topic.getQueryTweetTime()), true, true);
Query query = AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, englishAnalyzer, topic.getQuery());
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(query, BooleanClause.Occur.MUST);
Query q = builder.build();
TopDocs rs = searcher.search(q, searchArgs.hits);
List<String> queryTokens = AnalyzerUtils.tokenize(englishAnalyzer, topic.getQuery());
RerankerContext context = new RerankerContext(searcher, query, topic.getId(), topic.getQuery(), queryTokens, FIELD_BODY, filter);
ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
long queryTime = (System.currentTimeMillis() - curQueryTime);
for (int i = 0; i < docs.documents.length; i++) {
String qid = topic.getId().replaceFirst("^MB0*", "");
out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(FIELD_ID).stringValue(), (i + 1), docs.scores[i], searchArgs.runtag));
}
LOG.info("Query " + topic.getId() + " (elapsed time = " + queryTime + "ms)");
totalTime += queryTime;
cnt++;
}
LOG.info("All queries completed!");
LOG.info("Total elapsed time = " + totalTime + "ms");
LOG.info("Average query latency = " + (totalTime / cnt) + "ms");
reader.close();
out.close();
}
use of org.kohsuke.args4j.CmdLineException in project newts by OpenNMS.
the class ImportRunner method execute.
public void execute(String... args) throws Exception {
CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
// handling of wrong arguments
System.err.println(e.getMessage());
parser.printUsage(System.err);
return;
}
// Setup the slf4j metrics reporter
MetricRegistry metrics = new MetricRegistry();
final long start = System.currentTimeMillis();
metrics.register("elapsed-seconds", new Gauge<Double>() {
@Override
public Double getValue() {
return (System.currentTimeMillis() - start) / 1000.0;
}
});
final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).outputTo(System.err).convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build();
reporter.start(10, SECONDS);
if (m_restUrl == null) {
// we are using a direct importer so use a NewtsReporter for storing metrics
NewtsReporter newtsReporter = NewtsReporter.forRegistry(metrics).name("importer").convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build(repository());
newtsReporter.start(1, SECONDS);
}
LOG.debug("Scanning {} for GSOD data files...", m_source);
// walk the files in the directory given
Observable<Sample> samples = fileTreeWalker(m_source.toPath()).subscribeOn(Schedulers.io()).map(meter(metrics.meter("files"), Path.class)).map(reportFile()).mergeMap(lines()).filter(exclude("YEARMODA")).mergeMap(samples()).map(adjustTime()).map(meter(metrics.meter("samples"), Sample.class));
Observable<List<Sample>> batches = samples.buffer(m_samplesPerBatch);
Observable<Boolean> doImport = m_restUrl != null ? restPoster(batches, metrics) : directPoster(batches, metrics);
System.err.println("doImport = " + doImport);
// GO!!!
final AtomicReference<Subscription> subscription = new AtomicReference<>();
final AtomicBoolean failed = new AtomicBoolean(false);
final CountDownLatch latch = new CountDownLatch(1);
Subscription s = doImport.subscribe(new Observer<Boolean>() {
@Override
public void onCompleted() {
System.err.println("Finished Importing Everything!");
reporter.report();
latch.countDown();
System.exit(0);
}
@Override
public void onError(Throwable e) {
failed.set(true);
System.err.println("Error importing!");
e.printStackTrace();
try {
// latch.await();
Subscription s = subscription.get();
if (s != null)
s.unsubscribe();
} catch (Exception ex) {
System.err.println("Failed to close httpClient!");
ex.printStackTrace();
} finally {
// dumpThreads();
}
}
@Override
public void onNext(Boolean t) {
System.err.println("Received a boolen: " + t);
}
});
subscription.set(s);
if (failed.get()) {
s.unsubscribe();
}
// latch.countDown();
System.err.println("Return from Subscribe!");
latch.await();
// dumpThreads();
}
use of org.kohsuke.args4j.CmdLineException in project ORCID-Source by ORCID.
the class CheckAndFixContributorNameVisibility method main.
public static void main(String[] args) throws IOException {
CheckAndFixContributorNameVisibility fixer = new CheckAndFixContributorNameVisibility();
CmdLineParser parser = new CmdLineParser(fixer);
try {
parser.parseArgument(args);
fixer.validateArgs(parser);
fixer.init();
fixer.execute();
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
}
}
Aggregations