Search in sources :

Example 66 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestNRTCachingDirectory method testNRTAndCommit.

public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random());
    final int numDocs = TestUtil.nextInt(random(), 100, 400);
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    final List<BytesRef> ids = new ArrayList<>();
    DirectoryReader r = null;
    for (int docCount = 0; docCount < numDocs; docCount++) {
        final Document doc = docs.nextDoc();
        ids.add(new BytesRef(doc.get("docid")));
        w.addDocument(doc);
        if (random().nextInt(20) == 17) {
            if (r == null) {
                r = DirectoryReader.open(w.w);
            } else {
                final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
                if (r2 != null) {
                    r.close();
                    r = r2;
                }
            }
            assertEquals(1 + docCount, r.numDocs());
            final IndexSearcher s = newSearcher(r);
            // Just make sure search can run; we can't assert
            // totHits since it could be 0
            TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
        // System.out.println("tot hits " + hits.totalHits);
        }
    }
    if (r != null) {
        r.close();
    }
    // Close should force cache to clear since all files are sync'd
    w.close();
    final String[] cachedFiles = cachedDir.listCachedFiles();
    for (String file : cachedFiles) {
        System.out.println("FAIL: cached file " + file + " remains after sync");
    }
    assertEquals(0, cachedFiles.length);
    r = DirectoryReader.open(dir);
    for (BytesRef id : ids) {
        assertEquals(1, r.docFreq(new Term("docid", id)));
    }
    r.close();
    cachedDir.close();
    docs.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 67 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestSimilarityBase method testHeartRanking.

/** Test whether all similarities return document 3 before documents 7 and 8. */
public void testHeartRanking() throws IOException {
    Query q = new TermQuery(new Term(FIELD_BODY, "heart"));
    for (SimilarityBase sim : sims) {
        searcher.setSimilarity(sim);
        TopDocs topDocs = searcher.search(q, 1000);
        assertEquals("Failed: " + sim.toString(), "2", reader.document(topDocs.scoreDocs[0].doc).get(FIELD_ID));
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term)

Example 68 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithOffset.

public void testOverlapWithOffset() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        // no positions!
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    try {
        final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1);
        // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        // new SpanTermQuery(new Term(FIELD, "{fox}")),
        // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(query, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 69 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class SynonymTokenizer method testCustomScoreQueryHighlight.

public void testCustomScoreQueryHighlight() throws Exception {
    TermQuery termQuery = new TermQuery(new Term(FIELD_NAME, "very"));
    CustomScoreQuery query = new CustomScoreQuery(termQuery);
    searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
    final int docId0 = hits.scoreDocs[0].doc;
    Document doc = searcher.doc(docId0);
    String storedField = doc.get(FIELD_NAME);
    TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    String fragment = highlighter.getBestFragment(stream, storedField);
    assertEquals("Hello this is a piece of text that is <B>very</B> long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) TopDocs(org.apache.lucene.search.TopDocs) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) Sort(org.apache.lucene.search.Sort)

Example 70 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithOffsetExactPhrase.

public void testOverlapWithOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        // no positions!
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Aggregations

TopDocs (org.apache.lucene.search.TopDocs)496 IndexSearcher (org.apache.lucene.search.IndexSearcher)302 Document (org.apache.lucene.document.Document)275 TermQuery (org.apache.lucene.search.TermQuery)189 IndexReader (org.apache.lucene.index.IndexReader)187 Term (org.apache.lucene.index.Term)174 Directory (org.apache.lucene.store.Directory)174 Query (org.apache.lucene.search.Query)172 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)144 BooleanQuery (org.apache.lucene.search.BooleanQuery)127 ScoreDoc (org.apache.lucene.search.ScoreDoc)127 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)120 Sort (org.apache.lucene.search.Sort)94 Field (org.apache.lucene.document.Field)85 SortField (org.apache.lucene.search.SortField)74 IOException (java.io.IOException)58 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)56 TextField (org.apache.lucene.document.TextField)47 PhraseQuery (org.apache.lucene.search.PhraseQuery)46 PrefixQuery (org.apache.lucene.search.PrefixQuery)45