use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class SpellingQueryConverter method analyze.
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
TokenStream stream = analyzer.tokenStream("", text);
// TODO: support custom attributes
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
//overwriting any flags already set...
token.setFlags(flagsAttValue);
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class DelimitedPayloadTokenFilterTest method testIntEncoding.
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new IntegerEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
filter.reset();
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
assertTermEquals("jumped", filter, termAtt, payAtt, null);
assertTermEquals("over", filter, termAtt, payAtt, null);
assertTermEquals("the", filter, termAtt, payAtt, null);
assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
assertFalse(filter.incrementToken());
filter.end();
filter.close();
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class NumericPayloadTokenFilterTest method test.
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
final MockTokenizer input = new MockTokenizer(MockTokenizer.WHITESPACE, false);
input.setReader(new StringReader(test));
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(input), 3, "D");
boolean seenDogs = false;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
nptf.reset();
while (nptf.incrementToken()) {
if (termAtt.toString().equals("dogs")) {
seenDogs = true;
assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
//safe here to just use the bytes, otherwise we should use offset, length
byte[] bytes = payloadAtt.getPayload().bytes;
assertTrue(bytes.length + " does not equal: " + payloadAtt.getPayload().length, bytes.length == payloadAtt.getPayload().length);
assertTrue(payloadAtt.getPayload().offset + " does not equal: " + 0, payloadAtt.getPayload().offset == 0);
float pay = PayloadHelper.decodeFloat(bytes);
assertTrue(pay + " does not equal: " + 3, pay == 3);
} else {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals("word"));
}
}
assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TestDelimitedPayloadTokenFilterFactory method testDelim.
public void testDelim() throws Exception {
Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(reader);
stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream);
stream.reset();
while (stream.incrementToken()) {
PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
assertNotNull(payAttr);
byte[] payData = payAttr.getPayload().bytes;
assertNotNull(payData);
float payFloat = PayloadHelper.decodeFloat(payData);
assertEquals(0.1f, payFloat, 0.0f);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TestDelimitedPayloadTokenFilterFactory method testEncoder.
public void testEncoder() throws Exception {
Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) stream).setReader(reader);
stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
stream.reset();
while (stream.incrementToken()) {
PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
assertNotNull(payAttr);
byte[] payData = payAttr.getPayload().bytes;
assertNotNull(payData);
float payFloat = PayloadHelper.decodeFloat(payData);
assertEquals(0.1f, payFloat, 0.0f);
}
stream.end();
stream.close();
}
Aggregations