Search in sources :

Example 66 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestSpanNotQuery method testNoPositions.

public void testNoPositions() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = new IndexSearcher(ir);
    SpanTermQuery query = new SpanTermQuery(new Term("foo", "bar"));
    SpanTermQuery query2 = new SpanTermQuery(new Term("foo", "baz"));
    IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
        is.search(new SpanNotQuery(query, query2), 5);
    });
    assertTrue(expected.getMessage().contains("was indexed without position data"));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 67 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TokenSourcesTest method testMaxStartOffsetConsistency.

public void testMaxStartOffsetConsistency() throws IOException {
    FieldType tvFieldType = new FieldType(TextField.TYPE_NOT_STORED);
    tvFieldType.setStoreTermVectors(true);
    tvFieldType.setStoreTermVectorOffsets(true);
    tvFieldType.setStoreTermVectorPositions(true);
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    //we don't necessarily consume the whole stream because of limiting by startOffset
    analyzer.setEnableChecks(false);
    Document doc = new Document();
    final String TEXT = " f gg h";
    doc.add(new Field("fld_tv", analyzer.tokenStream("fooFld", TEXT), tvFieldType));
    doc.add(new TextField("fld_notv", analyzer.tokenStream("barFld", TEXT)));
    IndexReader reader;
    try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
        writer.addDocument(doc);
        reader = writer.getReader();
    }
    try {
        Fields tvFields = reader.getTermVectors(0);
        for (int maxStartOffset = -1; maxStartOffset <= TEXT.length(); maxStartOffset++) {
            TokenStream tvStream = TokenSources.getTokenStream("fld_tv", tvFields, TEXT, analyzer, maxStartOffset);
            TokenStream anaStream = TokenSources.getTokenStream("fld_notv", tvFields, TEXT, analyzer, maxStartOffset);
            //assert have same tokens, none of which has a start offset > maxStartOffset
            final OffsetAttribute tvOffAtt = tvStream.addAttribute(OffsetAttribute.class);
            final OffsetAttribute anaOffAtt = anaStream.addAttribute(OffsetAttribute.class);
            tvStream.reset();
            anaStream.reset();
            while (tvStream.incrementToken()) {
                assertTrue(anaStream.incrementToken());
                assertEquals(tvOffAtt.startOffset(), anaOffAtt.startOffset());
                if (maxStartOffset >= 0)
                    assertTrue(tvOffAtt.startOffset() <= maxStartOffset);
            }
            assertTrue(anaStream.incrementToken() == false);
            tvStream.end();
            anaStream.end();
            tvStream.close();
            anaStream.close();
        }
    } finally {
        reader.close();
    }
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Fields(org.apache.lucene.index.Fields) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 68 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TokenSourcesTest method testPayloads.

// LUCENE-5294
public void testPayloads() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
    myFieldType.setStoreTermVectors(true);
    myFieldType.setStoreTermVectorOffsets(true);
    myFieldType.setStoreTermVectorPositions(true);
    myFieldType.setStoreTermVectorPayloads(true);
    curOffset = 0;
    Token[] tokens = new Token[] { getToken("foxes"), getToken("can"), getToken("jump"), getToken("high") };
    Document doc = new Document();
    doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    writer.close();
    assertEquals(1, reader.numDocs());
    TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);
    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    ts.reset();
    for (Token token : tokens) {
        assertTrue(ts.incrementToken());
        assertEquals(token.toString(), termAtt.toString());
        assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
        assertEquals(token.getPayload(), payloadAtt.getPayload());
        assertEquals(token.startOffset(), offsetAtt.startOffset());
        assertEquals(token.endOffset(), offsetAtt.endOffset());
    }
    assertFalse(ts.incrementToken());
    reader.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) IndexReader(org.apache.lucene.index.IndexReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 69 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBasics.

//
//  Tests below were ported from the PostingsHighlighter. Possibly augmented.  Far below are newer tests.
//
public void testBasics() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 70 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testMultipleTerms.

public void testMultipleTerms() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)779 Document (org.apache.lucene.document.Document)679 Directory (org.apache.lucene.store.Directory)588 IndexReader (org.apache.lucene.index.IndexReader)510 Term (org.apache.lucene.index.Term)325 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)141 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)120 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)95 StringField (org.apache.lucene.document.StringField)93 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)77 Test (org.junit.Test)75