use of io.anserini.rerank.IdentityReranker in project Anserini by castorini.
the class PyseriniEntryPoint method search.
public Map<String, Float> search(String query, int numHits) throws IOException, ParseException {
// for now, using BM25 similarity - not branching on args.bm25 or args.ql
float k1 = 0.9f;
float b = 0.4f;
Similarity similarity = new BM25Similarity(k1, b);
// for now, creating Topics map and appending query and setting id=1
SortedMap<Integer, String> topics = new TreeMap<>();
int id = 1;
topics.put(id, query);
// for now, using IdentityReranker - not branching on args.rm3
RerankerCascade cascade = new RerankerCascade();
cascade.add(new IdentityReranker());
Map<String, Float> scoredDocs = search(topics, similarity, numHits, cascade, false, false);
return scoredDocs;
}
use of io.anserini.rerank.IdentityReranker in project Anserini by castorini.
the class SearchWebCollection method main.
public static void main(String[] args) throws Exception {
SearchArgs searchArgs = new SearchArgs();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: SearchWebCollection" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info("Reading index at " + searchArgs.index);
Directory dir;
if (searchArgs.inmem) {
LOG.info("Using MMapDirectory with preload");
dir = new MMapDirectory(Paths.get(searchArgs.index));
((MMapDirectory) dir).setPreload(true);
} else {
LOG.info("Using default FSDirectory");
dir = FSDirectory.open(Paths.get(searchArgs.index));
}
Similarity similarity = null;
if (searchArgs.ql) {
LOG.info("Using QL scoring model");
similarity = new LMDirichletSimilarity(searchArgs.mu);
} else if (searchArgs.bm25) {
LOG.info("Using BM25 scoring model");
similarity = new BM25Similarity(searchArgs.k1, searchArgs.b);
} else {
LOG.error("Error: Must specify scoring model!");
System.exit(-1);
}
RerankerCascade cascade = new RerankerCascade();
boolean useQueryParser = false;
if (searchArgs.rm3) {
cascade.add(new Rm3Reranker(new EnglishAnalyzer(), FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.gov2.txt"));
useQueryParser = true;
} else {
cascade.add(new IdentityReranker());
}
FeatureExtractors extractors = null;
if (searchArgs.extractors != null) {
extractors = FeatureExtractors.loadExtractor(searchArgs.extractors);
}
if (searchArgs.dumpFeatures) {
PrintStream out = new PrintStream(searchArgs.featureFile);
Qrels qrels = new Qrels(searchArgs.qrels);
cascade.add(new WebCollectionLtrDataGenerator(out, qrels, extractors));
}
Path topicsFile = Paths.get(searchArgs.topics);
if (!Files.exists(topicsFile) || !Files.isRegularFile(topicsFile) || !Files.isReadable(topicsFile)) {
throw new IllegalArgumentException("Topics file : " + topicsFile + " does not exist or is not a (readable) file.");
}
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + searchArgs.topicReader + "TopicReader").getConstructor(Path.class).newInstance(topicsFile);
SortedMap<Integer, String> topics = tr.read();
final long start = System.nanoTime();
SearchWebCollection searcher = new SearchWebCollection(searchArgs.index);
searcher.search(topics, searchArgs.output, similarity, searchArgs.hits, cascade, useQueryParser, searchArgs.keepstop);
searcher.close();
final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
LOG.info("Total " + topics.size() + " topics searched in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
}
use of io.anserini.rerank.IdentityReranker in project Anserini by castorini.
the class SearchTimeUtil method main.
public static void main(String[] args) throws IOException, ParseException, ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, InstantiationException {
if (args.length != 1) {
System.err.println("Usage: SearchTimeUtil <indexDir>");
System.err.println("indexDir: index directory");
System.exit(1);
}
String[] topics = { "topics.web.1-50.txt", "topics.web.51-100.txt", "topics.web.101-150.txt", "topics.web.151-200.txt", "topics.web.201-250.txt", "topics.web.251-300.txt" };
SearchWebCollection searcher = new SearchWebCollection(args[0]);
for (String topicFile : topics) {
Path topicsFile = Paths.get("src/resources/topics-and-qrels/", topicFile);
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + "Webxml" + "TopicReader").getConstructor(Path.class).newInstance(topicsFile);
SortedMap<Integer, String> queries = tr.read();
for (int i = 1; i <= 3; i++) {
final long start = System.nanoTime();
String submissionFile = File.createTempFile(topicFile + "_" + i, ".tmp").getAbsolutePath();
RerankerCascade cascade = new RerankerCascade();
cascade.add(new IdentityReranker());
searcher.search(queries, submissionFile, new BM25Similarity(0.9f, 0.4f), 1000, cascade);
final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
System.out.println(topicFile + "_" + i + " search completed in " + DurationFormatUtils.formatDuration(durationMillis, "mm:ss:SSS"));
}
}
searcher.close();
}
Aggregations