Search in sources :

Example 21 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class QueryBuilder method analyzePhrase.

/** 
   * Creates simple phrase query from the cached tokenstream contents 
   */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;
    stream.reset();
    while (stream.incrementToken()) {
        if (enablePositionIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }
    return builder.build();
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 22 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestTypeTokenFilter method testPositons.

private void testPositons(TypeTokenFilter stpf) throws IOException {
    TypeAttribute typeAtt = stpf.getAttribute(TypeAttribute.class);
    CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    while (stpf.incrementToken()) {
        log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
        assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.getPositionIncrement(), 3);
    }
    stpf.end();
    stpf.close();
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 23 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestTeeSinkTokenFilter method performance.

/**
   * Not an explicit test, just useful to print out some info on performance
   */
@SuppressWarnings("resource")
public void performance() throws Exception {
    int[] tokCount = { 100, 500, 1000, 2000, 5000, 10000 };
    int[] modCounts = { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
    for (int k = 0; k < tokCount.length; k++) {
        StringBuilder buffer = new StringBuilder();
        System.out.println("-----Tokens: " + tokCount[k] + "-----");
        for (int i = 0; i < tokCount[k]; i++) {
            buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
        }
        //make sure we produce the same tokens
        TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
        TokenStream sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), 100);
        teeStream.consumeAllTokens();
        TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100);
        CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
        CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
        for (int i = 0; stream.incrementToken(); i++) {
            assertTrue(sink.incrementToken());
            assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
        }
        //simulate two fields, each being analyzed once, for 20 documents
        for (int j = 0; j < modCounts.length; j++) {
            int tfPos = 0;
            long start = System.currentTimeMillis();
            for (int i = 0; i < 20; i++) {
                stream = new StandardFilter(standardTokenizer(buffer));
                PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
                while (stream.incrementToken()) {
                    tfPos += posIncrAtt.getPositionIncrement();
                }
                stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]);
                posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
                while (stream.incrementToken()) {
                    tfPos += posIncrAtt.getPositionIncrement();
                }
            }
            long finish = System.currentTimeMillis();
            System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
            int sinkPos = 0;
            //simulate one field with one sink
            start = System.currentTimeMillis();
            for (int i = 0; i < 20; i++) {
                teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
                sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), modCounts[j]);
                PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
                while (teeStream.incrementToken()) {
                    sinkPos += posIncrAtt.getPositionIncrement();
                }
                //System.out.println("Modulo--------");
                posIncrAtt = sink.getAttribute(PositionIncrementAttribute.class);
                while (sink.incrementToken()) {
                    sinkPos += posIncrAtt.getPositionIncrement();
                }
            }
            finish = System.currentTimeMillis();
            System.out.println("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
            assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);
        }
        System.out.println("- End Tokens: " + tokCount[k] + "-----");
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) StandardFilter(org.apache.lucene.analysis.standard.StandardFilter) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 24 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestSnowball method testFilterTokens.

public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    filter.incrementToken();
    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new BytesRef(new byte[] { 0, 1, 2, 3 }), payloadAtt.getPayload());
}
Also used : PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) FlagsAttribute(org.apache.lucene.analysis.tokenattributes.FlagsAttribute) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) BytesRef(org.apache.lucene.util.BytesRef) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 25 with PositionIncrementAttribute

use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.

the class TestIndexWriter method testNegativePositions.

// LUCENE-1255
public void testNegativePositions() throws Throwable {
    final TokenStream tokens = new TokenStream() {

        final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

        final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

        final Iterator<String> terms = Arrays.asList("a", "b", "c").iterator();

        boolean first = true;

        @Override
        public boolean incrementToken() {
            if (!terms.hasNext())
                return false;
            clearAttributes();
            termAtt.append(terms.next());
            posIncrAtt.setPositionIncrement(first ? 0 : 1);
            first = false;
            return true;
        }
    };
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new TextField("field", tokens));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
    });
    w.close();
    dir.close();
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Iterator(java.util.Iterator) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory)

Aggregations

PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)51 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)34 TokenStream (org.apache.lucene.analysis.TokenStream)29 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)26 IOException (java.io.IOException)15 ArrayList (java.util.ArrayList)14 BytesRef (org.apache.lucene.util.BytesRef)14 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)11 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)11 TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)11 StringReader (java.io.StringReader)9 Term (org.apache.lucene.index.Term)8 Token (org.apache.lucene.analysis.Token)7 FlagsAttribute (org.apache.lucene.analysis.tokenattributes.FlagsAttribute)7 PositionLengthAttribute (org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute)7 List (java.util.List)6 LinkedList (java.util.LinkedList)4 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)4 Document (org.apache.lucene.document.Document)4 Iterator (java.util.Iterator)3