Search in sources :

Example 61 with CmdLineParser

use of org.kohsuke.args4j.CmdLineParser in project Anserini by castorini.

the class SearchTweets method main.

public static void main(String[] args) throws Exception {
    long initializationTime = System.currentTimeMillis();
    SearchArgs searchArgs = new SearchArgs();
    CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example: SearchTweets" + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    LOG.info("Reading index at " + searchArgs.index);
    Directory dir;
    if (searchArgs.inmem) {
        LOG.info("Using MMapDirectory with preload");
        dir = new MMapDirectory(Paths.get(searchArgs.index));
        ((MMapDirectory) dir).setPreload(true);
    } else {
        LOG.info("Using default FSDirectory");
        dir = FSDirectory.open(Paths.get(searchArgs.index));
    }
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    if (searchArgs.ql) {
        LOG.info("Using QL scoring model");
        searcher.setSimilarity(new LMDirichletSimilarity(searchArgs.mu));
    } else if (searchArgs.bm25) {
        LOG.info("Using BM25 scoring model");
        searcher.setSimilarity(new BM25Similarity(searchArgs.k1, searchArgs.b));
    } else {
        LOG.error("Error: Must specify scoring model!");
        System.exit(-1);
    }
    RerankerCascade cascade = new RerankerCascade();
    EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();
    if (searchArgs.rm3) {
        cascade.add(new Rm3Reranker(englishAnalyzer, FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.twitter.txt"));
        cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
    } else {
        cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
    }
    if (!searchArgs.model.isEmpty() && searchArgs.extractors != null) {
        LOG.debug(String.format("Ranklib model used, modeled loaded from %s", searchArgs.model));
        cascade.add(new RankLibReranker(searchArgs.model, FIELD_BODY, searchArgs.extractors));
    }
    FeatureExtractors extractorChain = null;
    if (searchArgs.extractors != null) {
        extractorChain = FeatureExtractors.loadExtractor(searchArgs.extractors);
    }
    if (searchArgs.dumpFeatures) {
        PrintStream out = new PrintStream(searchArgs.featureFile);
        Qrels qrels = new Qrels(searchArgs.qrels);
        cascade.add(new TweetsLtrDataGenerator(out, qrels, extractorChain));
    }
    MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(searchArgs.topics));
    PrintStream out = new PrintStream(new FileOutputStream(new File(searchArgs.output)));
    LOG.info("Writing output to " + searchArgs.output);
    LOG.info("Initialized complete! (elapsed time = " + (System.currentTimeMillis() - initializationTime) + "ms)");
    long totalTime = 0;
    int cnt = 0;
    for (MicroblogTopic topic : topics) {
        long curQueryTime = System.currentTimeMillis();
        // do not cosider the tweets with tweet ids that are beyond the queryTweetTime
        // <querytweettime> tag contains the timestamp of the query in terms of the
        // chronologically nearest tweet id within the corpus
        Query filter = TermRangeQuery.newStringRange(FIELD_ID, "0", String.valueOf(topic.getQueryTweetTime()), true, true);
        Query query = AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, englishAnalyzer, topic.getQuery());
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(filter, BooleanClause.Occur.FILTER);
        builder.add(query, BooleanClause.Occur.MUST);
        Query q = builder.build();
        TopDocs rs = searcher.search(q, searchArgs.hits);
        List<String> queryTokens = AnalyzerUtils.tokenize(englishAnalyzer, topic.getQuery());
        RerankerContext context = new RerankerContext(searcher, query, topic.getId(), topic.getQuery(), queryTokens, FIELD_BODY, filter);
        ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
        long queryTime = (System.currentTimeMillis() - curQueryTime);
        for (int i = 0; i < docs.documents.length; i++) {
            String qid = topic.getId().replaceFirst("^MB0*", "");
            out.println(String.format("%s Q0 %s %d %f %s", qid, docs.documents[i].getField(FIELD_ID).stringValue(), (i + 1), docs.scores[i], searchArgs.runtag));
        }
        LOG.info("Query " + topic.getId() + " (elapsed time = " + queryTime + "ms)");
        totalTime += queryTime;
        cnt++;
    }
    LOG.info("All queries completed!");
    LOG.info("Total elapsed time = " + totalTime + "ms");
    LOG.info("Average query latency = " + (totalTime / cnt) + "ms");
    reader.close();
    out.close();
}
Also used : RemoveRetweetsTemporalTiebreakReranker(io.anserini.rerank.twitter.RemoveRetweetsTemporalTiebreakReranker) ScoredDocuments(io.anserini.rerank.ScoredDocuments) RerankerCascade(io.anserini.rerank.RerankerCascade) Rm3Reranker(io.anserini.rerank.rm3.Rm3Reranker) RankLibReranker(io.anserini.rerank.RankLibReranker) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) PrintStream(java.io.PrintStream) Qrels(io.anserini.util.Qrels) CmdLineParser(org.kohsuke.args4j.CmdLineParser) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) MMapDirectory(org.apache.lucene.store.MMapDirectory) FeatureExtractors(io.anserini.ltr.feature.FeatureExtractors) TweetsLtrDataGenerator(io.anserini.ltr.TweetsLtrDataGenerator) FileOutputStream(java.io.FileOutputStream) IndexReader(org.apache.lucene.index.IndexReader) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) LMDirichletSimilarity(org.apache.lucene.search.similarities.LMDirichletSimilarity) File(java.io.File) CmdLineException(org.kohsuke.args4j.CmdLineException) RerankerContext(io.anserini.rerank.RerankerContext)

Example 62 with CmdLineParser

use of org.kohsuke.args4j.CmdLineParser in project newts by OpenNMS.

the class ImportRunner method execute.

public void execute(String... args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        // handling of wrong arguments
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return;
    }
    // Setup the slf4j metrics reporter
    MetricRegistry metrics = new MetricRegistry();
    final long start = System.currentTimeMillis();
    metrics.register("elapsed-seconds", new Gauge<Double>() {

        @Override
        public Double getValue() {
            return (System.currentTimeMillis() - start) / 1000.0;
        }
    });
    final ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).outputTo(System.err).convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build();
    reporter.start(10, SECONDS);
    if (m_restUrl == null) {
        // we are using a direct importer so use a NewtsReporter for storing metrics
        NewtsReporter newtsReporter = NewtsReporter.forRegistry(metrics).name("importer").convertRatesTo(SECONDS).convertDurationsTo(MILLISECONDS).build(repository());
        newtsReporter.start(1, SECONDS);
    }
    LOG.debug("Scanning {} for GSOD data files...", m_source);
    // walk the files in the directory given
    Observable<Sample> samples = fileTreeWalker(m_source.toPath()).subscribeOn(Schedulers.io()).map(meter(metrics.meter("files"), Path.class)).map(reportFile()).mergeMap(lines()).filter(exclude("YEARMODA")).mergeMap(samples()).map(adjustTime()).map(meter(metrics.meter("samples"), Sample.class));
    Observable<List<Sample>> batches = samples.buffer(m_samplesPerBatch);
    Observable<Boolean> doImport = m_restUrl != null ? restPoster(batches, metrics) : directPoster(batches, metrics);
    System.err.println("doImport = " + doImport);
    // GO!!!
    final AtomicReference<Subscription> subscription = new AtomicReference<>();
    final AtomicBoolean failed = new AtomicBoolean(false);
    final CountDownLatch latch = new CountDownLatch(1);
    Subscription s = doImport.subscribe(new Observer<Boolean>() {

        @Override
        public void onCompleted() {
            System.err.println("Finished Importing Everything!");
            reporter.report();
            latch.countDown();
            System.exit(0);
        }

        @Override
        public void onError(Throwable e) {
            failed.set(true);
            System.err.println("Error importing!");
            e.printStackTrace();
            try {
                // latch.await();
                Subscription s = subscription.get();
                if (s != null)
                    s.unsubscribe();
            } catch (Exception ex) {
                System.err.println("Failed to close httpClient!");
                ex.printStackTrace();
            } finally {
            // dumpThreads();
            }
        }

        @Override
        public void onNext(Boolean t) {
            System.err.println("Received a boolen: " + t);
        }
    });
    subscription.set(s);
    if (failed.get()) {
        s.unsubscribe();
    }
    // latch.countDown();
    System.err.println("Return from Subscribe!");
    latch.await();
// dumpThreads();
}
Also used : Path(java.nio.file.Path) CmdLineParser(org.kohsuke.args4j.CmdLineParser) ConsoleReporter(com.codahale.metrics.ConsoleReporter) Sample(org.opennms.newts.api.Sample) MetricRegistry(com.codahale.metrics.MetricRegistry) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) ParseException(java.text.ParseException) CmdLineException(org.kohsuke.args4j.CmdLineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) List(java.util.List) NewtsReporter(org.opennms.newts.reporter.metrics.NewtsReporter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Subscription(rx.Subscription) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 63 with CmdLineParser

use of org.kohsuke.args4j.CmdLineParser in project ORCID-Source by ORCID.

the class FindOrcidWorkDuplicates method main.

/**
     * @param args
     */
public static void main(String[] args) throws Exception {
    FindOrcidWorkDuplicates findOrcidWorkDuplicates = new FindOrcidWorkDuplicates();
    CmdLineParser parser = new CmdLineParser(findOrcidWorkDuplicates);
    parser.parseArgument(args);
    findOrcidWorkDuplicates.validateArgs(parser);
    findOrcidWorkDuplicates.createOutputFile();
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser)

Example 64 with CmdLineParser

use of org.kohsuke.args4j.CmdLineParser in project ORCID-Source by ORCID.

the class CheckAndFixContributorNameVisibility method main.

public static void main(String[] args) throws IOException {
    CheckAndFixContributorNameVisibility fixer = new CheckAndFixContributorNameVisibility();
    CmdLineParser parser = new CmdLineParser(fixer);
    try {
        parser.parseArgument(args);
        fixer.validateArgs(parser);
        fixer.init();
        fixer.execute();
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
    }
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 65 with CmdLineParser

use of org.kohsuke.args4j.CmdLineParser in project ORCID-Source by ORCID.

the class AddGrantTypeToExistingClients method main.

public static void main(String[] args) {
    AddGrantTypeToExistingClients addScopesToExistingClients = new AddGrantTypeToExistingClients();
    CmdLineParser parser = new CmdLineParser(addScopesToExistingClients);
    try {
        parser.parseArgument(args);
        addScopesToExistingClients.validateParameters(parser);
        addScopesToExistingClients.init();
        addScopesToExistingClients.process();
        System.out.println();
        System.out.println();
        System.out.println(addScopesToExistingClients.getClientsUpdated() + " clients were updated");
        System.out.println();
        System.out.println();
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.exit(1);
    }
    System.exit(0);
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Aggregations

CmdLineParser (org.kohsuke.args4j.CmdLineParser)119 CmdLineException (org.kohsuke.args4j.CmdLineException)80 File (java.io.File)16 Test (org.junit.Test)14 IOException (java.io.IOException)11 HyracksConnection (org.apache.hyracks.api.client.HyracksConnection)10 IHyracksClientConnection (org.apache.hyracks.api.client.IHyracksClientConnection)10 ArrayList (java.util.ArrayList)9 JobId (org.apache.hyracks.api.job.JobId)9 JobSpecification (org.apache.hyracks.api.job.JobSpecification)9 PrintStream (java.io.PrintStream)6 FileOutputStream (java.io.FileOutputStream)4 List (java.util.List)4 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)3 Qrels (io.anserini.util.Qrels)3 Directory (org.apache.lucene.store.Directory)3 FSDirectory (org.apache.lucene.store.FSDirectory)3 ConsoleReporter (com.codahale.metrics.ConsoleReporter)2 MetricRegistry (com.codahale.metrics.MetricRegistry)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2