Search in sources :

Example 1 with SubmissionReport

use of org.apache.lucene.benchmark.quality.utils.SubmissionReport in project lucene-solr by apache.

the class QueryDriver method main.

public static void main(String[] args) throws Exception {
    if (args.length < 4 || args.length > 5) {
        System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
        System.err.println("topicsFile: input file containing queries");
        System.err.println("qrelsFile: input file containing relevance judgements");
        System.err.println("submissionFile: output submission file for trec_eval");
        System.err.println("indexDir: index directory");
        System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
        System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
        System.exit(1);
    }
    Path topicsFile = Paths.get(args[0]);
    Path qrelsFile = Paths.get(args[1]);
    Path submissionFile = Paths.get(args[2]);
    SubmissionReport submitLog = new SubmissionReport(new PrintWriter(Files.newBufferedWriter(submissionFile, StandardCharsets.UTF_8)), "lucene");
    FSDirectory dir = FSDirectory.open(Paths.get(args[3]));
    // default to Title-only if not specified.
    String fieldSpec = args.length == 5 ? args[4] : "T";
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    int maxResults = 1000;
    String docNameField = "docname";
    PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
    // use trec utilities to read trec topics into quality queries
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery[] qqs = qReader.readQueries(Files.newBufferedReader(topicsFile, StandardCharsets.UTF_8));
    // prepare judge, with trec utilities that read from a QRels file
    Judge judge = new TrecJudge(Files.newBufferedReader(qrelsFile, StandardCharsets.UTF_8));
    // validate topics & judgments match each other
    judge.validateData(qqs, logger);
    Set<String> fieldSet = new HashSet<>();
    if (fieldSpec.indexOf('T') >= 0)
        fieldSet.add("title");
    if (fieldSpec.indexOf('D') >= 0)
        fieldSet.add("description");
    if (fieldSpec.indexOf('N') >= 0)
        fieldSet.add("narrative");
    // set the parsing of quality queries into Lucene queries.
    QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");
    // run the benchmark
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    qrun.setMaxResults(maxResults);
    QualityStats[] stats = qrun.execute(judge, submitLog, logger);
    // print an avarage sum of the results
    QualityStats avg = QualityStats.average(stats);
    avg.log("SUMMARY", 2, logger, "  ");
    reader.close();
    dir.close();
}
Also used : Path(java.nio.file.Path) IndexSearcher(org.apache.lucene.search.IndexSearcher) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleQQParser(org.apache.lucene.benchmark.quality.utils.SimpleQQParser) SubmissionReport(org.apache.lucene.benchmark.quality.utils.SubmissionReport) IndexReader(org.apache.lucene.index.IndexReader) OutputStreamWriter(java.io.OutputStreamWriter) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet)

Example 2 with SubmissionReport

use of org.apache.lucene.benchmark.quality.utils.SubmissionReport in project lucene-solr by apache.

the class TestQualityRun method testTrecQuality.

public void testTrecQuality() throws Exception {
    // first create the partial reuters index
    createReutersIndex();
    int maxResults = 1000;
    // orig docID is in the linedoc format title 
    String docNameField = "doctitle";
    PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true) : null;
    // prepare topics
    InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery[] qqs = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, StandardCharsets.UTF_8)));
    // prepare judge
    InputStream qrels = getClass().getResourceAsStream("trecQRels.txt");
    Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, StandardCharsets.UTF_8)));
    // validate topics & judgments match each other
    judge.validateData(qqs, logger);
    Directory dir = newFSDirectory(getWorkDir().resolve("index"));
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null;
    qrun.setMaxResults(maxResults);
    QualityStats[] stats = qrun.execute(judge, submitLog, logger);
    // m>=3: these queries remain perfect
    for (int i = 0; i < stats.length; i++) {
        QualityStats s = stats[i];
        switch(i % 8) {
            case 0:
                assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
                assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
                }
                break;
            case 1:
                assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
                assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
                }
                break;
            case 2:
                assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
                assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
                }
                break;
            default:
                {
                    assertEquals("avg-p should be perfect: " + s.getAvp(), 1.0, s.getAvp(), 1E-2);
                    assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                        assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
                    }
                }
        }
    }
    QualityStats avg = QualityStats.average(stats);
    if (logger != null) {
        avg.log("Average statistis:", 1, logger, "  ");
    }
    assertTrue("mean avg-p should be hurt: " + avg.getAvp(), 1.0 > avg.getAvp());
    assertTrue("avg recall should be hurt: " + avg.getRecall(), 1.0 > avg.getRecall());
    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
        assertTrue("avg p_at_" + j + " should be hurt: " + avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) SimpleQQParser(org.apache.lucene.benchmark.quality.utils.SimpleQQParser) SubmissionReport(org.apache.lucene.benchmark.quality.utils.SubmissionReport) TrecJudge(org.apache.lucene.benchmark.quality.trec.TrecJudge) TrecTopicsReader(org.apache.lucene.benchmark.quality.trec.TrecTopicsReader) BufferedReader(java.io.BufferedReader) IndexReader(org.apache.lucene.index.IndexReader) OutputStreamWriter(java.io.OutputStreamWriter) TrecJudge(org.apache.lucene.benchmark.quality.trec.TrecJudge) PrintWriter(java.io.PrintWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 SimpleQQParser (org.apache.lucene.benchmark.quality.utils.SimpleQQParser)2 SubmissionReport (org.apache.lucene.benchmark.quality.utils.SubmissionReport)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 BufferedReader (java.io.BufferedReader)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 Path (java.nio.file.Path)1 HashSet (java.util.HashSet)1 TrecJudge (org.apache.lucene.benchmark.quality.trec.TrecJudge)1 TrecTopicsReader (org.apache.lucene.benchmark.quality.trec.TrecTopicsReader)1 Directory (org.apache.lucene.store.Directory)1 FSDirectory (org.apache.lucene.store.FSDirectory)1