use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class SimpleQueryConverter method convert.
@Override
public Collection<Token> convert(String origQuery) {
Collection<Token> result = new HashSet<>();
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
try (TokenStream ts = analyzer.tokenStream("", origQuery)) {
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
Token tok = new Token();
tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
tok.setFlags(flagsAtt.getFlags());
tok.setPayload(payloadAtt.getPayload());
tok.setPositionIncrement(posIncAtt.getPositionIncrement());
tok.setType(typeAtt.type());
result.add(tok);
}
ts.end();
return result;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TypeAsPayloadTokenFilterTest method test.
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(whitespaceMockTokenizer(test)));
int count = 0;
CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
nptf.reset();
while (nptf.incrementToken()) {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
String type = payloadAtt.getPayload().utf8ToString();
assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()));
count++;
}
assertTrue(count + " does not equal: " + 10, count == 10);
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TokenOffsetPayloadTokenFilterTest method test.
public void test() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(whitespaceMockTokenizer(test));
int count = 0;
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);
nptf.reset();
while (nptf.incrementToken()) {
BytesRef pay = payloadAtt.getPayload();
assertTrue("pay is null and it shouldn't be", pay != null);
byte[] data = pay.bytes;
int start = PayloadHelper.decodeInt(data, 0);
assertTrue(start + " does not equal: " + offsetAtt.startOffset(), start == offsetAtt.startOffset());
int end = PayloadHelper.decodeInt(data, 4);
assertTrue(end + " does not equal: " + offsetAtt.endOffset(), end == offsetAtt.endOffset());
count++;
}
assertTrue(count + " does not equal: " + 10, count == 10);
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TestNGramFilters method testEdgeNGramFilterPayload.
/**
* Test EdgeNGramFilterFactory on tokens with payloads
*/
public void testEdgeNGramFilterPayload() throws Exception {
Reader reader = new StringReader("test|0.1");
TokenStream stream = whitespaceMockTokenizer(reader);
stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);
stream.reset();
while (stream.incrementToken()) {
PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
assertNotNull(payAttr);
BytesRef payData = payAttr.getPayload();
assertNotNull(payData);
float payFloat = PayloadHelper.decodeFloat(payData.bytes);
assertEquals(0.1f, payFloat, 0.0f);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.PayloadAttribute in project lucene-solr by apache.
the class TokenSourcesTest method testRandomizedRoundTrip.
@Repeat(iterations = 10)
public //@Seed("947083AB20AB2D4F")
void testRandomizedRoundTrip() throws Exception {
final int distinct = TestUtil.nextInt(random(), 1, 10);
String[] terms = new String[distinct];
BytesRef[] termBytes = new BytesRef[distinct];
for (int i = 0; i < distinct; ++i) {
terms[i] = TestUtil.randomRealisticUnicodeString(random());
termBytes[i] = new BytesRef(terms[i]);
}
final BaseTermVectorsFormatTestCase.RandomTokenStream rTokenStream = new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes);
//check to see if the token streams might have non-deterministic testable result
final boolean storeTermVectorPositions = random().nextBoolean();
final int[] startOffsets = rTokenStream.getStartOffsets();
final int[] positionsIncrements = rTokenStream.getPositionsIncrements();
for (int i = 1; i < positionsIncrements.length; i++) {
if (storeTermVectorPositions && positionsIncrements[i] != 0) {
continue;
}
// than previous token's endOffset? That would increase the testable possibilities.
if (startOffsets[i] == startOffsets[i - 1]) {
if (VERBOSE)
System.out.println("Skipping test because can't easily validate random token-stream is correct.");
return;
}
}
//sanity check itself
assertTokenStreamContents(rTokenStream, rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(), rTokenStream.getPositionsIncrements());
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
myFieldType.setStoreTermVectors(true);
myFieldType.setStoreTermVectorOffsets(true);
myFieldType.setStoreTermVectorPositions(storeTermVectorPositions);
//payloads require positions; it will throw an error otherwise
myFieldType.setStoreTermVectorPayloads(storeTermVectorPositions && random().nextBoolean());
Document doc = new Document();
doc.add(new Field("field", rTokenStream, myFieldType));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
assertEquals(1, reader.numDocs());
TokenStream vectorTokenStream = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);
//sometimes check payloads
PayloadAttribute payloadAttribute = null;
if (myFieldType.storeTermVectorPayloads() && usually()) {
payloadAttribute = vectorTokenStream.addAttribute(PayloadAttribute.class);
}
assertTokenStreamContents(vectorTokenStream, rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(), myFieldType.storeTermVectorPositions() ? rTokenStream.getPositionsIncrements() : null);
//test payloads
if (payloadAttribute != null) {
vectorTokenStream.reset();
for (int i = 0; vectorTokenStream.incrementToken(); i++) {
assertEquals(rTokenStream.getPayloads()[i], payloadAttribute.getPayload());
}
}
reader.close();
dir.close();
}
Aggregations