Search in sources :

Example 1 with TrecTopicsReader

use of org.apache.lucene.benchmark.quality.trec.TrecTopicsReader in project lucene-solr by apache.

the class TestQualityRun method testTrecTopicsReader.

public void testTrecTopicsReader() throws Exception {
    // prepare topics
    InputStream topicsFile = getClass().getResourceAsStream("trecTopics.txt");
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery[] qqs = qReader.readQueries(new BufferedReader(new InputStreamReader(topicsFile, StandardCharsets.UTF_8)));
    assertEquals(20, qqs.length);
    QualityQuery qq = qqs[0];
    assertEquals("statement months  total 1987", qq.getValue("title"));
    assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2", qq.getValue("description"));
    assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2", qq.getValue("narrative"));
    qq = qqs[1];
    assertEquals("agreed 15  against five", qq.getValue("title"));
    assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2", qq.getValue("description"));
    assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2", qq.getValue("narrative"));
    qq = qqs[19];
    assertEquals("20 while  common week", qq.getValue("title"));
    assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2", qq.getValue("description"));
    assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2", qq.getValue("narrative"));
}
Also used : TrecTopicsReader(org.apache.lucene.benchmark.quality.trec.TrecTopicsReader) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader)

Example 2 with TrecTopicsReader

use of org.apache.lucene.benchmark.quality.trec.TrecTopicsReader in project lucene-solr by apache.

the class TestQualityRun method testTrecQuality.

public void testTrecQuality() throws Exception {
    // first create the partial reuters index
    createReutersIndex();
    int maxResults = 1000;
    // orig docID is in the linedoc format title 
    String docNameField = "doctitle";
    PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true) : null;
    // prepare topics
    InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery[] qqs = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, StandardCharsets.UTF_8)));
    // prepare judge
    InputStream qrels = getClass().getResourceAsStream("trecQRels.txt");
    Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, StandardCharsets.UTF_8)));
    // validate topics & judgments match each other
    judge.validateData(qqs, logger);
    Directory dir = newFSDirectory(getWorkDir().resolve("index"));
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null;
    qrun.setMaxResults(maxResults);
    QualityStats[] stats = qrun.execute(judge, submitLog, logger);
    // m>=3: these queries remain perfect
    for (int i = 0; i < stats.length; i++) {
        QualityStats s = stats[i];
        switch(i % 8) {
            case 0:
                assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
                assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
                }
                break;
            case 1:
                assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
                assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
                }
                break;
            case 2:
                assertTrue("avg-p should be hurt: " + s.getAvp(), 1.0 > s.getAvp());
                assertTrue("recall should be hurt: " + s.getRecall(), 1.0 > s.getRecall());
                for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                    assertTrue("p_at_" + j + " should be hurt: " + s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
                }
                break;
            default:
                {
                    assertEquals("avg-p should be perfect: " + s.getAvp(), 1.0, s.getAvp(), 1E-2);
                    assertEquals("recall should be perfect: " + s.getRecall(), 1.0, s.getRecall(), 1E-2);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
                        assertEquals("p_at_" + j + " should be perfect: " + s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
                    }
                }
        }
    }
    QualityStats avg = QualityStats.average(stats);
    if (logger != null) {
        avg.log("Average statistis:", 1, logger, "  ");
    }
    assertTrue("mean avg-p should be hurt: " + avg.getAvp(), 1.0 > avg.getAvp());
    assertTrue("avg recall should be hurt: " + avg.getRecall(), 1.0 > avg.getRecall());
    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
        assertTrue("avg p_at_" + j + " should be hurt: " + avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) SimpleQQParser(org.apache.lucene.benchmark.quality.utils.SimpleQQParser) SubmissionReport(org.apache.lucene.benchmark.quality.utils.SubmissionReport) TrecJudge(org.apache.lucene.benchmark.quality.trec.TrecJudge) TrecTopicsReader(org.apache.lucene.benchmark.quality.trec.TrecTopicsReader) BufferedReader(java.io.BufferedReader) IndexReader(org.apache.lucene.index.IndexReader) OutputStreamWriter(java.io.OutputStreamWriter) TrecJudge(org.apache.lucene.benchmark.quality.trec.TrecJudge) PrintWriter(java.io.PrintWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

BufferedReader (java.io.BufferedReader)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2 TrecTopicsReader (org.apache.lucene.benchmark.quality.trec.TrecTopicsReader)2 OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 TrecJudge (org.apache.lucene.benchmark.quality.trec.TrecJudge)1 SimpleQQParser (org.apache.lucene.benchmark.quality.utils.SimpleQQParser)1 SubmissionReport (org.apache.lucene.benchmark.quality.utils.SubmissionReport)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Directory (org.apache.lucene.store.Directory)1