Search in sources :

Example 16 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestSnowball method testFilterTokens.

public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    filter.incrementToken();
    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new BytesRef(new byte[] { 0, 1, 2, 3 }), payloadAtt.getPayload());
}
Also used : PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) BytesRef(org.apache.lucene.util.BytesRef) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 17 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class ShingleAnalyzerWrapperTest method testShingleAnalyzerWrapperPhraseQuery.

/*
   * This shows how to construct a phrase query containing shingles.
   */
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
        int j = -1;
        PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            j += posIncrAtt.getPositionIncrement();
            String termText = termAtt.toString();
            builder.add(new Term("content", termText), j);
        }
        ts.end();
    }
    PhraseQuery q = builder.build();
    ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
    int[] ranks = new int[] { 0 };
    compareRanks(hits, ranks);
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 18 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class QueryBuilder method analyzeMultiPhrase.

/** 
   * Creates complex phrase query from the cached tokenstream contents 
   */
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
    MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder();
    mpqb.setSlop(slop);
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;
    List<Term> multiTerms = new ArrayList<>();
    stream.reset();
    while (stream.incrementToken()) {
        int positionIncrement = posIncrAtt.getPositionIncrement();
        if (positionIncrement > 0 && multiTerms.size() > 0) {
            if (enablePositionIncrements) {
                mpqb.add(multiTerms.toArray(new Term[0]), position);
            } else {
                mpqb.add(multiTerms.toArray(new Term[0]));
            }
            multiTerms.clear();
        }
        position += positionIncrement;
        multiTerms.add(new Term(field, termAtt.getBytesRef()));
    }
    if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
    } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
    }
    return mpqb.build();
}
Also used : TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 19 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class QueryBuilder method analyzePhrase.

/** 
   * Creates simple phrase query from the cached tokenstream contents 
   */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;
    stream.reset();
    while (stream.incrementToken()) {
        if (enablePositionIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }
    return builder.build();
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 20 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TokenSourcesTest method testPayloads.

// LUCENE-5294
public void testPayloads() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
    myFieldType.setStoreTermVectors(true);
    myFieldType.setStoreTermVectorOffsets(true);
    myFieldType.setStoreTermVectorPositions(true);
    myFieldType.setStoreTermVectorPayloads(true);
    curOffset = 0;
    Token[] tokens = new Token[] { getToken("foxes"), getToken("can"), getToken("jump"), getToken("high") };
    Document doc = new Document();
    doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    writer.close();
    assertEquals(1, reader.numDocs());
    TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);
    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    ts.reset();
    for (Token token : tokens) {
        assertTrue(ts.incrementToken());
        assertEquals(token.toString(), termAtt.toString());
        assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
        assertEquals(token.getPayload(), payloadAtt.getPayload());
        assertEquals(token.startOffset(), offsetAtt.startOffset());
        assertEquals(token.endOffset(), offsetAtt.endOffset());
    }
    assertFalse(ts.incrementToken());
    reader.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) IndexReader(org.apache.lucene.index.IndexReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)50 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)33 TokenStream (org.apache.lucene.analysis.TokenStream)28 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)25 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 BytesRef (org.apache.lucene.util.BytesRef)14 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)11 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)11 TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)11 StringReader (java.io.StringReader)8 Term (org.apache.lucene.index.Term)8 Token (org.apache.lucene.analysis.Token)7 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 PositionLengthAttribute (org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute)7 List (java.util.List)6 LinkedList (java.util.LinkedList)4 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)4 Document (org.apache.lucene.document.Document)4 Iterator (java.util.Iterator)3