use of org.apache.lucene.benchmark.quality.utils.SubmissionReport in project lucene-solr by apache.
the class QueryDriver method main.
public static void main(String[] args) throws Exception {
if (args.length < 4 || args.length > 5) {
System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
System.err.println("topicsFile: input file containing queries");
System.err.println("qrelsFile: input file containing relevance judgements");
System.err.println("submissionFile: output submission file for trec_eval");
System.err.println("indexDir: index directory");
System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
System.exit(1);
}
Path topicsFile = Paths.get(args[0]);
Path qrelsFile = Paths.get(args[1]);
Path submissionFile = Paths.get(args[2]);
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(Files.newBufferedWriter(submissionFile, StandardCharsets.UTF_8)), "lucene");
FSDirectory dir = FSDirectory.open(Paths.get(args[3]));
// default to Title-only if not specified.
String fieldSpec = args.length == 5 ? args[4] : "T";
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
int maxResults = 1000;
String docNameField = "docname";
PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
// use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery[] qqs = qReader.readQueries(Files.newBufferedReader(topicsFile, StandardCharsets.UTF_8));
// prepare judge, with trec utilities that read from a QRels file
Judge judge = new TrecJudge(Files.newBufferedReader(qrelsFile, StandardCharsets.UTF_8));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
Set<String> fieldSet = new HashSet<>();
if (fieldSpec.indexOf('T') >= 0)
fieldSet.add("title");
if (fieldSpec.indexOf('D') >= 0)
fieldSet.add("description");
if (fieldSpec.indexOf('N') >= 0)
fieldSet.add("narrative");
// set the parsing of quality queries into Lucene queries.
QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");
// run the benchmark
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
qrun.setMaxResults(maxResults);
QualityStats[] stats = qrun.execute(judge, submitLog, logger);
// print an avarage sum of the results
QualityStats avg = QualityStats.average(stats);
avg.log("SUMMARY", 2, logger, " ");
reader.close();
dir.close();
}
use of org.apache.lucene.benchmark.quality.utils.SubmissionReport in project lucene-solr by apache.
the class TestQualityRun method testTrecQuality.
public void testTrecQuality() throws Exception {
// first create the partial reuters index
createReutersIndex();
int maxResults = 1000;
// orig docID is in the linedoc format title
String docNameField = "doctitle";
PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true) : null;
// prepare topics
InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery[] qqs = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, StandardCharsets.UTF_8)));
// prepare judge
InputStream qrels = getClass().getResourceAsStream("trecQRels.txt");
Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, StandardCharsets.UTF_8)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
Directory dir = newFSDirectory(getWorkDir().resolve("index"));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
QualityQueryParser qqParser = new SimpleQQParser("title", "body");
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null;
qrun.setMaxResults(maxResults);
QualityStats[] stats = qrun.execute(judge, submitLog, logger);
// m>=3: these queries remain perfect
for (int i = 0; i < stats.length; i++) {
QualityStats s = stats[i];
switch(i % 8) {
case 0:
assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
}
break;
case 1:
assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
}
break;
case 2:
assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
}
break;
default:
{
assertEquals("avg-p should be perfect: " + s.getAvp(), 1.0, s.getAvp(), 1E-2);
assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
}
}
}
}
QualityStats avg = QualityStats.average(stats);
if (logger != null) {
avg.log("Average statistis:", 1, logger, " ");
}
assertTrue("mean avg-p should be hurt: " + avg.getAvp(), 1.0 > avg.getAvp());
assertTrue("avg recall should be hurt: " + avg.getRecall(), 1.0 > avg.getRecall());
for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
assertTrue("avg p_at_" + j + " should be hurt: " + avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
}
reader.close();
dir.close();
}
Aggregations