use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestTermAutomatonQuery method testAnyFromTokenStream.
public void testAnyFromTokenStream() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes the moon", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes sun", Field.Store.NO));
w.addDocument(doc);
// Should not match:
doc = new Document();
doc.add(newTextField("field", "here comes the other sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] { token("comes", 1, 1), token("comes", 0, 2), token("*", 1, 1), token("sun", 1, 1), token("moon", 0, 1) });
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(3, s.search(q, 1).totalHits);
w.close();
r.close();
dir.close();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestTermAutomatonQuery method testTermDoesNotExist.
public void testTermDoesNotExist() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "x y z", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] { token("a", 1, 1) });
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(0, s.search(q, 1).totalHits);
w.close();
r.close();
dir.close();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class ICUCollationField method getCollationKey.
/**
* analyze the text with the analyzer, instead of the collator.
* because icu collators are not thread safe, this keeps things
* simple (we already have a threadlocal clone in the reused TS)
*/
private BytesRef getCollationKey(String field, String text) {
try (TokenStream source = analyzer.tokenStream(field, text)) {
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
// we control the analyzer here: most errors are impossible
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken();
source.end();
return bytes;
} catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e);
}
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SpellingQueryConverter method analyze.
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
TokenStream stream = analyzer.tokenStream("", text);
// TODO: support custom attributes
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
//overwriting any flags already set...
token.setFlags(flagsAttValue);
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class TestGraphTokenStreamFiniteStrings method testEmpty.
public void testEmpty() throws Exception {
TokenStream ts = new CannedTokenStream();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
Iterator<TokenStream> it = graph.getFiniteStrings();
assertFalse(it.hasNext());
assertArrayEquals(new int[0], graph.articulationPoints());
}
Aggregations