Search in sources :

Example 51 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class LookupTopic method main.

public static void main(String[] args) throws Exception {
    Args searchArgs = new Args();
    CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example: " + LookupNode.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    LOG.info(String.format("Index: %s", searchArgs.index));
    LOG.info(String.format("Query: %s", searchArgs.query));
    LOG.info(String.format("Hits: %s", searchArgs.numHits));
    LookupTopic lookup = new LookupTopic(searchArgs.index);
    lookup.search(searchArgs.query, searchArgs.numHits);
    lookup.close();
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 52 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class FeatureExtractorCli method main.

/**
 * requires the user to supply the index directory and also the directory containing the qrels and topics
 * @param args  indexDir, qrelFile, topicFile, outputFile
 */
public static void main(String[] args) throws Exception {
    long curTime = System.nanoTime();
    FeatureExtractionArgs parsedArgs = new FeatureExtractionArgs();
    CmdLineParser parser = new CmdLineParser(parsedArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return;
    }
    Directory indexDirectory = FSDirectory.open(Paths.get(parsedArgs.indexDir));
    IndexReader reader = DirectoryReader.open(indexDirectory);
    Qrels qrels = new Qrels(parsedArgs.qrelFile);
    FeatureExtractors extractors = null;
    if (parsedArgs.extractors != null) {
        extractors = FeatureExtractors.loadExtractor(parsedArgs.extractors);
    }
    // Query parser needed to construct the query object for feature extraction in the loop
    PrintStream out = new PrintStream(new FileOutputStream(new File(parsedArgs.outputFile)));
    if (parsedArgs.collection.equals("gov2") || parsedArgs.collection.equals("webxml")) {
        // Open the topics file and read it
        String className = parsedArgs.collection.equals("gov2") ? "Trec" : "Webxml";
        TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + className + "TopicReader").getConstructor(Path.class).newInstance(Paths.get(parsedArgs.topicsFile));
        SortedMap<Integer, String> topics = tr.read();
        LOG.debug(String.format("%d topics found", topics.size()));
        WebFeatureExtractor extractor = new WebFeatureExtractor(reader, qrels, convertTopicsFormat(topics), extractors);
        extractor.printFeatures(out);
    } else if (parsedArgs.collection.equals("twitter")) {
        Map<String, String> topics = MicroblogTopicSet.fromFile(new File(parsedArgs.topicsFile)).toMap();
        LOG.debug(String.format("%d topics found", topics.size()));
        TwitterFeatureExtractor extractor = new TwitterFeatureExtractor(reader, qrels, topics, extractors);
        extractor.printFeatures(out);
    } else {
        System.err.println("Unrecognized collection " + parsedArgs.collection);
    }
}
Also used : Qrels(io.anserini.util.Qrels) PrintStream(java.io.PrintStream) CmdLineParser(org.kohsuke.args4j.CmdLineParser) FeatureExtractors(io.anserini.ltr.feature.FeatureExtractors) TopicReader(io.anserini.search.query.TopicReader) FileOutputStream(java.io.FileOutputStream) IndexReader(org.apache.lucene.index.IndexReader) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) SortedMap(java.util.SortedMap) CmdLineException(org.kohsuke.args4j.CmdLineException) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 53 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class SearchTweets method main.

public static void main(String[] args) throws Exception {
    long initializationTime = System.currentTimeMillis();
    SearchArgs searchArgs = new SearchArgs();
    CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example: SearchTweets" + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    LOG.info("Reading index at " + searchArgs.index);
    Directory dir;
    if (searchArgs.inmem) {
        LOG.info("Using MMapDirectory with preload");
        dir = new MMapDirectory(Paths.get(searchArgs.index));
        ((MMapDirectory) dir).setPreload(true);
    } else {
        LOG.info("Using default FSDirectory");
        dir = FSDirectory.open(Paths.get(searchArgs.index));
    }
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    if (searchArgs.ql) {
        LOG.info("Using QL scoring model");
        searcher.setSimilarity(new LMDirichletSimilarity(searchArgs.mu));
    } else if (searchArgs.bm25) {
        LOG.info("Using BM25 scoring model");
        searcher.setSimilarity(new BM25Similarity(searchArgs.k1, searchArgs.b));
    } else {
        LOG.error("Error: Must specify scoring model!");
        System.exit(-1);
    }
    RerankerCascade cascade = new RerankerCascade();
    EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();
    if (searchArgs.rm3) {
        cascade.add(new Rm3Reranker(englishAnalyzer, FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.twitter.txt"));
        cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
    } else {
        cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
    }
    if (!searchArgs.model.isEmpty() && searchArgs.extractors != null) {
        LOG.debug(String.format("Ranklib model used, modeled loaded from %s", searchArgs.model));
        cascade.add(new RankLibReranker(searchArgs.model, FIELD_BODY, searchArgs.extractors));
    }
    FeatureExtractors extractorChain = null;
    if (searchArgs.extractors != null) {
        extractorChain = FeatureExtractors.loadExtractor(searchArgs.extractors);
    }
    if (searchArgs.dumpFeatures) {
        PrintStream out = new PrintStream(searchArgs.featureFile);
        Qrels qrels = new Qrels(searchArgs.qrels);
        cascade.add(new TweetsLtrDataGenerator(out, qrels, extractorChain));
    }
    MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(searchArgs.topics));
    PrintStream out = new PrintStream(new FileOutputStream(new File(searchArgs.output)));
    LOG.info("Writing output to " + searchArgs.output);
    LOG.info("Initialized complete! (elapsed time = " + (System.currentTimeMillis() - initializationTime) + "ms)");
    long totalTime = 0;
    int cnt = 0;
    for (MicroblogTopic topic : topics) {
        long curQueryTime = System.currentTimeMillis();
        // do not cosider the tweets with tweet ids that are beyond the queryTweetTime
        // <querytweettime> tag contains the timestamp of the query in terms of the
        // chronologically nearest tweet id within the corpus
        Query filter = TermRangeQuery.newStringRange(FIELD_ID, "0", String.valueOf(topic.getQueryTweetTime()), true, true);
        Query query = AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, englishAnalyzer, topic.getQuery());
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(filter, BooleanClause.Occur.FILTER);
        builder.add(query, BooleanClause.Occur.MUST);
        Query q = builder.build();
        TopDocs rs = searcher.search(q, searchArgs.hits);
        List<String> queryTokens = AnalyzerUtils.tokenize(englishAnalyzer, topic.getQuery());
        RerankerContext context = new RerankerContext(searcher, query, topic.getId(), topic.getQuery(), queryTokens, FIELD_BODY, filter);
        ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
        long queryTime = (System.currentTimeMillis() - curQueryTime);
        for (int i = 0; i < docs.documents.length; i++) {
            String qid = topic.getId().replaceFirst("^MB0*", "");
            out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(FIELD_ID).stringValue(), (i + 1), docs.scores[i], searchArgs.runtag));
        }
        LOG.info("Query " + topic.getId() + " (elapsed time = " + queryTime + "ms)");
        totalTime += queryTime;
        cnt++;
    }
    LOG.info("All queries completed!");
    LOG.info("Total elapsed time = " + totalTime + "ms");
    LOG.info("Average query latency = " + (totalTime / cnt) + "ms");
    reader.close();
    out.close();
}
Also used : RemoveRetweetsTemporalTiebreakReranker(io.anserini.rerank.twitter.RemoveRetweetsTemporalTiebreakReranker) ScoredDocuments(io.anserini.rerank.ScoredDocuments) RerankerCascade(io.anserini.rerank.RerankerCascade) Rm3Reranker(io.anserini.rerank.rm3.Rm3Reranker) RankLibReranker(io.anserini.rerank.RankLibReranker) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) PrintStream(java.io.PrintStream) Qrels(io.anserini.util.Qrels) CmdLineParser(org.kohsuke.args4j.CmdLineParser) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) MMapDirectory(org.apache.lucene.store.MMapDirectory) FeatureExtractors(io.anserini.ltr.feature.FeatureExtractors) TweetsLtrDataGenerator(io.anserini.ltr.TweetsLtrDataGenerator) FileOutputStream(java.io.FileOutputStream) IndexReader(org.apache.lucene.index.IndexReader) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) LMDirichletSimilarity(org.apache.lucene.search.similarities.LMDirichletSimilarity) File(java.io.File) CmdLineException(org.kohsuke.args4j.CmdLineException) RerankerContext(io.anserini.rerank.RerankerContext)

Example 54 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project newts by OpenNMS.

the class ImportRunner method execute.

public void execute(String... args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        // handling of wrong arguments
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return;
    }
    // Setup the slf4j metrics reporter
    MetricRegistry metrics = new MetricRegistry();
    final long start = System.currentTimeMillis();
    metrics.register("elapsed-seconds", new Gauge<Double>() {

        @Override
        public Double getValue() {
            return (System.currentTimeMillis() - start) / 1000.0;
        }
    });
    final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).outputTo(System.err).convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build();
    reporter.start(10, SECONDS);
    if (m_restUrl == null) {
        // we are using a direct importer so use a NewtsReporter for storing metrics
        NewtsReporter newtsReporter = NewtsReporter.forRegistry(metrics).name("importer").convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build(repository());
        newtsReporter.start(1, SECONDS);
    }
    LOG.debug("Scanning {} for GSOD data files...", m_source);
    // walk the files in the directory given
    Observable<Sample> samples = fileTreeWalker(m_source.toPath()).subscribeOn(Schedulers.io()).map(meter(metrics.meter("files"), Path.class)).map(reportFile()).mergeMap(lines()).filter(exclude("YEARMODA")).mergeMap(samples()).map(adjustTime()).map(meter(metrics.meter("samples"), Sample.class));
    Observable<List<Sample>> batches = samples.buffer(m_samplesPerBatch);
    Observable<Boolean> doImport = m_restUrl != null ? restPoster(batches, metrics) : directPoster(batches, metrics);
    System.err.println("doImport = " + doImport);
    // GO!!!
    final AtomicReference<Subscription> subscription = new AtomicReference<>();
    final AtomicBoolean failed = new AtomicBoolean(false);
    final CountDownLatch latch = new CountDownLatch(1);
    Subscription s = doImport.subscribe(new Observer<Boolean>() {

        @Override
        public void onCompleted() {
            System.err.println("Finished Importing Everything!");
            reporter.report();
            latch.countDown();
            System.exit(0);
        }

        @Override
        public void onError(Throwable e) {
            failed.set(true);
            System.err.println("Error importing!");
            e.printStackTrace();
            try {
                // latch.await();
                Subscription s = subscription.get();
                if (s != null)
                    s.unsubscribe();
            } catch (Exception ex) {
                System.err.println("Failed to close httpClient!");
                ex.printStackTrace();
            } finally {
            // dumpThreads();
            }
        }

        @Override
        public void onNext(Boolean t) {
            System.err.println("Received a boolen: " + t);
        }
    });
    subscription.set(s);
    if (failed.get()) {
        s.unsubscribe();
    }
    // latch.countDown();
    System.err.println("Return from Subscribe!");
    latch.await();
// dumpThreads();
}
Also used : Path(java.nio.file.Path) CmdLineParser(org.kohsuke.args4j.CmdLineParser) ConsoleReporter(com.codahale.metrics.ConsoleReporter) Sample(org.opennms.newts.api.Sample) MetricRegistry(com.codahale.metrics.MetricRegistry) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) ParseException(java.text.ParseException) CmdLineException(org.kohsuke.args4j.CmdLineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) List(java.util.List) NewtsReporter(org.opennms.newts.reporter.metrics.NewtsReporter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Subscription(rx.Subscription) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 55 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project ORCID-Source by ORCID.

the class CheckAndFixContributorNameVisibility method main.

public static void main(String[] args) throws IOException {
    CheckAndFixContributorNameVisibility fixer = new CheckAndFixContributorNameVisibility();
    CmdLineParser parser = new CmdLineParser(fixer);
    try {
        parser.parseArgument(args);
        fixer.validateArgs(parser);
        fixer.init();
        fixer.execute();
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
    }
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Aggregations

CmdLineException (org.kohsuke.args4j.CmdLineException)105 CmdLineParser (org.kohsuke.args4j.CmdLineParser)80 IOException (java.io.IOException)16 File (java.io.File)14 ArrayList (java.util.ArrayList)11 PrintStream (java.io.PrintStream)7 StringWriter (java.io.StringWriter)6 List (java.util.List)5 FileOutputStream (java.io.FileOutputStream)4 Path (java.nio.file.Path)4 CmdLineParser (com.google.gerrit.util.cli.CmdLineParser)3 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)3 Qrels (io.anserini.util.Qrels)3 Directory (org.apache.lucene.store.Directory)3 FSDirectory (org.apache.lucene.store.FSDirectory)3 ConsoleReporter (com.codahale.metrics.ConsoleReporter)2 MetricRegistry (com.codahale.metrics.MetricRegistry)2 Project (com.google.gerrit.reviewdb.client.Project)2 OrmException (com.google.gwtorm.server.OrmException)2 Hudson (hudson.model.Hudson)2