use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.
the class QueryBuilder method analyzePhrase.
/**
* Creates simple phrase query from the cached tokenstream contents
*/
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
stream.reset();
while (stream.incrementToken()) {
if (enablePositionIncrements) {
position += posIncrAtt.getPositionIncrement();
} else {
position += 1;
}
builder.add(new Term(field, termAtt.getBytesRef()), position);
}
return builder.build();
}
use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.
the class TestTypeTokenFilter method testPositons.
private void testPositons(TypeTokenFilter stpf) throws IOException {
TypeAttribute typeAtt = stpf.getAttribute(TypeAttribute.class);
CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
stpf.reset();
while (stpf.incrementToken()) {
log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.getPositionIncrement(), 3);
}
stpf.end();
stpf.close();
}
use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.
the class TestTeeSinkTokenFilter method performance.
/**
* Not an explicit test, just useful to print out some info on performance
*/
@SuppressWarnings("resource")
public void performance() throws Exception {
int[] tokCount = { 100, 500, 1000, 2000, 5000, 10000 };
int[] modCounts = { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
for (int k = 0; k < tokCount.length; k++) {
StringBuilder buffer = new StringBuilder();
System.out.println("-----Tokens: " + tokCount[k] + "-----");
for (int i = 0; i < tokCount[k]; i++) {
buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
TokenStream sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), 100);
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100);
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
for (int i = 0; stream.incrementToken(); i++) {
assertTrue(sink.incrementToken());
assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
}
//simulate two fields, each being analyzed once, for 20 documents
for (int j = 0; j < modCounts.length; j++) {
int tfPos = 0;
long start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
stream = new StandardFilter(standardTokenizer(buffer));
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]);
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
}
long finish = System.currentTimeMillis();
System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
int sinkPos = 0;
//simulate one field with one sink
start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), modCounts[j]);
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
while (teeStream.incrementToken()) {
sinkPos += posIncrAtt.getPositionIncrement();
}
//System.out.println("Modulo--------");
posIncrAtt = sink.getAttribute(PositionIncrementAttribute.class);
while (sink.incrementToken()) {
sinkPos += posIncrAtt.getPositionIncrement();
}
}
finish = System.currentTimeMillis();
System.out.println("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);
}
System.out.println("- End Tokens: " + tokCount[k] + "-----");
}
}
use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.
the class TestSnowball method testFilterTokens.
public void testFilterTokens() throws Exception {
SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
filter.incrementToken();
assertEquals("accent", termAtt.toString());
assertEquals(2, offsetAtt.startOffset());
assertEquals(7, offsetAtt.endOffset());
assertEquals("wrd", typeAtt.type());
assertEquals(3, posIncAtt.getPositionIncrement());
assertEquals(77, flagsAtt.getFlags());
assertEquals(new BytesRef(new byte[] { 0, 1, 2, 3 }), payloadAtt.getPayload());
}
use of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute in project lucene-solr by apache.
the class TestIndexWriter method testNegativePositions.
// LUCENE-1255
public void testNegativePositions() throws Throwable {
final TokenStream tokens = new TokenStream() {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
final Iterator<String> terms = Arrays.asList("a", "b", "c").iterator();
boolean first = true;
@Override
public boolean incrementToken() {
if (!terms.hasNext())
return false;
clearAttributes();
termAtt.append(terms.next());
posIncrAtt.setPositionIncrement(first ? 0 : 1);
first = false;
return true;
}
};
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new TextField("field", tokens));
expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
});
w.close();
dir.close();
}
Aggregations