use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class ExtractRm3Stopwords method main.
public static void main(String[] args) throws Exception {
Args myArgs = new Args();
CmdLineParser parser = new CmdLineParser(myArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: ExtractRm3Stopwords" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
Directory dir = FSDirectory.open(Paths.get(myArgs.index));
IndexReader reader = DirectoryReader.open(dir);
Comparator<Pair> comp = new Comparator<Pair>() {
@Override
public int compare(Pair p1, Pair p2) {
if (p1.value == p2.value) {
return p1.key.compareTo(p2.key);
} else
return (p1.value < p2.value) ? -1 : 1;
}
};
PriorityQueue<Pair> queue = new PriorityQueue<Pair>(myArgs.topK, comp);
LOG.info("Starting to iterate through all terms...");
Terms terms = MultiFields.getFields(reader).terms(myArgs.field);
TermsEnum termsEnum = terms.iterator();
BytesRef text = null;
int cnt = 0;
while ((text = termsEnum.next()) != null) {
String term = text.utf8ToString();
if (term.length() == 0)
continue;
Pair p = new Pair(term, reader.docFreq(new Term(myArgs.field, term)));
if (queue.size() < myArgs.topK) {
queue.add(p);
} else {
if (comp.compare(p, queue.peek()) > 0) {
queue.poll();
queue.add(p);
}
}
cnt++;
if (cnt % 1000000 == 0) {
LOG.info("At term " + term);
}
}
PrintStream out = new PrintStream(new FileOutputStream(new File(myArgs.output)));
Pair pair;
while ((pair = queue.poll()) != null) {
out.println(pair.key);
}
out.close();
LOG.info("Done!");
}
use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class IndexCollection method main.
public static void main(String[] args) throws Exception {
IndexCollection.Args indexCollectionArgs = new IndexCollection.Args();
CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: " + IndexCollection.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
new IndexCollection(indexCollectionArgs).run();
}
use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class IndexUtils method main.
public static void main(String[] argv) throws Exception {
Args args = new Args();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(argv);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
return;
}
final IndexUtils util = new IndexUtils(args.index);
if (args.stats) {
util.printIndexStats();
}
if (args.term != null) {
util.printTermCounts(args.term);
}
if (args.docvectorDocid != null) {
util.printDocumentVector(args.docvectorDocid);
}
if (args.rawDoc != null) {
System.out.println(util.getRawDocument(args.rawDoc));
}
if (args.transformedDoc != null) {
System.out.println(util.getTransformedDocument(args.transformedDoc));
}
if (args.sentDoc != null) {
for (Sentence sent : util.getSentDocument(args.sentDoc)) {
System.out.println(sent);
}
}
if (args.lookupDocid != null) {
System.out.println(util.convertDocidToLuceneDocid(args.lookupDocid));
}
if (args.lookupLuceneDocid > 0) {
System.out.println(util.convertLuceneDocidToDocid(args.lookupLuceneDocid));
}
}
use of org.kohsuke.args4j.CmdLineException in project Anserini by castorini.
the class IndexObjectTriples method main.
public static void main(String[] args) throws Exception {
Args indexRDFCollectionArgs = new Args();
CmdLineParser parser = new CmdLineParser(indexRDFCollectionArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example command: " + IndexObjectTriples.class.getSimpleName() + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
new IndexObjectTriples(indexRDFCollectionArgs).run();
}
use of org.kohsuke.args4j.CmdLineException in project fess by codelibs.
the class Crawler method main.
public static void main(final String[] args) {
final Options options = new Options();
final CmdLineParser parser = new CmdLineParser(options);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("java " + Crawler.class.getCanonicalName() + " [options...] arguments...");
parser.printUsage(System.err);
return;
}
if (logger.isDebugEnabled()) {
try {
ManagementFactory.getRuntimeMXBean().getInputArguments().stream().forEach(s -> logger.debug("Parameter: " + s));
System.getProperties().entrySet().stream().forEach(e -> logger.debug("Property: " + e.getKey() + "=" + e.getValue()));
System.getenv().entrySet().forEach(e -> logger.debug("Env: " + e.getKey() + "=" + e.getValue()));
logger.debug("Option: " + options);
} catch (final Exception e) {
// ignore
}
}
final String transportAddresses = System.getProperty(Constants.FESS_ES_TRANSPORT_ADDRESSES);
if (StringUtil.isNotBlank(transportAddresses)) {
System.setProperty(EsClient.TRANSPORT_ADDRESSES, transportAddresses);
}
final String clusterName = System.getProperty(Constants.FESS_ES_CLUSTER_NAME);
if (StringUtil.isNotBlank(clusterName)) {
System.setProperty(EsClient.CLUSTER_NAME, clusterName);
}
int exitCode;
try {
running.set(true);
SingletonLaContainerFactory.setConfigPath("app.xml");
SingletonLaContainerFactory.setExternalContext(new GenericExternalContext());
SingletonLaContainerFactory.setExternalContextComponentDefRegister(new GenericExternalContextComponentDefRegister());
SingletonLaContainerFactory.init();
final Thread shutdownCallback = new Thread("ShutdownHook") {
@Override
public void run() {
destroyContainer();
}
};
Runtime.getRuntime().addShutdownHook(shutdownCallback);
exitCode = process(options);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("Crawler is stopped.", e);
} else if (logger.isInfoEnabled()) {
logger.info("Crawler is stopped.");
}
exitCode = Constants.EXIT_FAIL;
} catch (final Throwable t) {
logger.error("Crawler does not work correctly.", t);
exitCode = Constants.EXIT_FAIL;
} finally {
destroyContainer();
}
if (exitCode != Constants.EXIT_OK) {
System.exit(exitCode);
}
}
Aggregations