Search in sources :

Example 96 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class ExtractRm3Stopwords method main.

public static void main(String[] args) throws Exception {
    Args myArgs = new Args();
    CmdLineParser parser = new CmdLineParser(myArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example: ExtractRm3Stopwords" + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    Directory dir = FSDirectory.open(Paths.get(myArgs.index));
    IndexReader reader = DirectoryReader.open(dir);
    Comparator<Pair> comp = new Comparator<Pair>() {

        @Override
        public int compare(Pair p1, Pair p2) {
            if (p1.value == p2.value) {
                return p1.key.compareTo(p2.key);
            } else
                return (p1.value < p2.value) ? -1 : 1;
        }
    };
    PriorityQueue<Pair> queue = new PriorityQueue<Pair>(myArgs.topK, comp);
    LOG.info("Starting to iterate through all terms...");
    Terms terms = MultiFields.getFields(reader).terms(myArgs.field);
    TermsEnum termsEnum = terms.iterator();
    BytesRef text = null;
    int cnt = 0;
    while ((text = termsEnum.next()) != null) {
        String term = text.utf8ToString();
        if (term.length() == 0)
            continue;
        Pair p = new Pair(term, reader.docFreq(new Term(myArgs.field, term)));
        if (queue.size() < myArgs.topK) {
            queue.add(p);
        } else {
            if (comp.compare(p, queue.peek()) > 0) {
                queue.poll();
                queue.add(p);
            }
        }
        cnt++;
        if (cnt % 1000000 == 0) {
            LOG.info("At term " + term);
        }
    }
    PrintStream out = new PrintStream(new FileOutputStream(new File(myArgs.output)));
    Pair pair;
    while ((pair = queue.poll()) != null) {
        out.println(pair.key);
    }
    out.close();
    LOG.info("Done!");
}
Also used : PrintStream(java.io.PrintStream) CmdLineParser(org.kohsuke.args4j.CmdLineParser) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) PriorityQueue(java.util.PriorityQueue) Comparator(java.util.Comparator) TermsEnum(org.apache.lucene.index.TermsEnum) FileOutputStream(java.io.FileOutputStream) IndexReader(org.apache.lucene.index.IndexReader) File(java.io.File) CmdLineException(org.kohsuke.args4j.CmdLineException) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 97 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class IndexCollection method main.

public static void main(String[] args) throws Exception {
    IndexCollection.Args indexCollectionArgs = new IndexCollection.Args();
    CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example: " + IndexCollection.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    new IndexCollection(indexCollectionArgs).run();
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 98 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class IndexUtils method main.

public static void main(String[] argv) throws Exception {
    Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(argv);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return;
    }
    final IndexUtils util = new IndexUtils(args.index);
    if (args.stats) {
        util.printIndexStats();
    }
    if (args.term != null) {
        util.printTermCounts(args.term);
    }
    if (args.docvectorDocid != null) {
        util.printDocumentVector(args.docvectorDocid);
    }
    if (args.rawDoc != null) {
        System.out.println(util.getRawDocument(args.rawDoc));
    }
    if (args.transformedDoc != null) {
        System.out.println(util.getTransformedDocument(args.transformedDoc));
    }
    if (args.sentDoc != null) {
        for (Sentence sent : util.getSentDocument(args.sentDoc)) {
            System.out.println(sent);
        }
    }
    if (args.lookupDocid != null) {
        System.out.println(util.convertDocidToLuceneDocid(args.lookupDocid));
    }
    if (args.lookupLuceneDocid > 0) {
        System.out.println(util.convertLuceneDocidToDocid(args.lookupLuceneDocid));
    }
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) Sentence(edu.stanford.nlp.simple.Sentence) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 99 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.

the class IndexObjectTriples method main.

public static void main(String[] args) throws Exception {
    Args indexRDFCollectionArgs = new Args();
    CmdLineParser parser = new CmdLineParser(indexRDFCollectionArgs, ParserProperties.defaults().withUsageWidth(90));
    try {
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        System.err.println("Example command: " + IndexObjectTriples.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
        return;
    }
    new IndexObjectTriples(indexRDFCollectionArgs).run();
}
Also used : CmdLineParser(org.kohsuke.args4j.CmdLineParser) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 100 with CmdLineException

use of org.kohsuke.args4j.CmdLineException in project fess by codelibs.

the class Crawler method main.

public static void main(final String[] args) {
    final Options options = new Options();
    final CmdLineParser parser = new CmdLineParser(options);
    try {
        parser.parseArgument(args);
    } catch (final CmdLineException e) {
        System.err.println(e.getMessage());
        System.err.println("java " + Crawler.class.getCanonicalName() + " [options...] arguments...");
        parser.printUsage(System.err);
        return;
    }
    if (logger.isDebugEnabled()) {
        try {
            ManagementFactory.getRuntimeMXBean().getInputArguments().stream().forEach(s -> logger.debug("Parameter: " + s));
            System.getProperties().entrySet().stream().forEach(e -> logger.debug("Property: " + e.getKey() + "=" + e.getValue()));
            System.getenv().entrySet().forEach(e -> logger.debug("Env: " + e.getKey() + "=" + e.getValue()));
            logger.debug("Option: " + options);
        } catch (final Exception e) {
        // ignore
        }
    }
    final String transportAddresses = System.getProperty(Constants.FESS_ES_TRANSPORT_ADDRESSES);
    if (StringUtil.isNotBlank(transportAddresses)) {
        System.setProperty(EsClient.TRANSPORT_ADDRESSES, transportAddresses);
    }
    final String clusterName = System.getProperty(Constants.FESS_ES_CLUSTER_NAME);
    if (StringUtil.isNotBlank(clusterName)) {
        System.setProperty(EsClient.CLUSTER_NAME, clusterName);
    }
    int exitCode;
    try {
        running.set(true);
        SingletonLaContainerFactory.setConfigPath("app.xml");
        SingletonLaContainerFactory.setExternalContext(new GenericExternalContext());
        SingletonLaContainerFactory.setExternalContextComponentDefRegister(new GenericExternalContextComponentDefRegister());
        SingletonLaContainerFactory.init();
        final Thread shutdownCallback = new Thread("ShutdownHook") {

            @Override
            public void run() {
                destroyContainer();
            }
        };
        Runtime.getRuntime().addShutdownHook(shutdownCallback);
        exitCode = process(options);
    } catch (final ContainerNotAvailableException e) {
        if (logger.isDebugEnabled()) {
            logger.debug("Crawler is stopped.", e);
        } else if (logger.isInfoEnabled()) {
            logger.info("Crawler is stopped.");
        }
        exitCode = Constants.EXIT_FAIL;
    } catch (final Throwable t) {
        logger.error("Crawler does not work correctly.", t);
        exitCode = Constants.EXIT_FAIL;
    } finally {
        destroyContainer();
    }
    if (exitCode != Constants.EXIT_OK) {
        System.exit(exitCode);
    }
}
Also used : ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) CmdLineParser(org.kohsuke.args4j.CmdLineParser) GenericExternalContext(org.lastaflute.di.core.external.GenericExternalContext) CmdLineException(org.kohsuke.args4j.CmdLineException) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) IOException(java.io.IOException) CmdLineException(org.kohsuke.args4j.CmdLineException) GenericExternalContextComponentDefRegister(org.lastaflute.di.core.external.GenericExternalContextComponentDefRegister)

Aggregations

CmdLineException (org.kohsuke.args4j.CmdLineException)100 CmdLineParser (org.kohsuke.args4j.CmdLineParser)75 IOException (java.io.IOException)16 File (java.io.File)14 ArrayList (java.util.ArrayList)11 PrintStream (java.io.PrintStream)7 StringWriter (java.io.StringWriter)6 List (java.util.List)5 FileOutputStream (java.io.FileOutputStream)4 Path (java.nio.file.Path)4 CmdLineParser (com.google.gerrit.util.cli.CmdLineParser)3 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)3 Qrels (io.anserini.util.Qrels)3 Directory (org.apache.lucene.store.Directory)3 FSDirectory (org.apache.lucene.store.FSDirectory)3 ConsoleReporter (com.codahale.metrics.ConsoleReporter)2 MetricRegistry (com.codahale.metrics.MetricRegistry)2 Project (com.google.gerrit.reviewdb.client.Project)2 OrmException (com.google.gwtorm.server.OrmException)2 Hudson (hudson.model.Hudson)2