use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class ShingleFilterTest method testTrailingHole1.
public void testTrailingHole1() throws IOException {
// Analyzing "wizard of", where of is removed as a
// stopword leaving a trailing hole:
Token[] inputTokens = new Token[] { createToken("wizard", 0, 6) };
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);
assertTokenStreamContents(filter, new String[] { "wizard", "wizard _" }, new int[] { 0, 0 }, new int[] { 6, 9 }, new int[] { 1, 0 }, 9);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellCheckComponent method addOriginalTermsToResponse.
private void addOriginalTermsToResponse(NamedList response, Collection<Token> originalTerms) {
List<String> originalTermStr = new ArrayList<String>();
for (Token t : originalTerms) {
originalTermStr.add(t.toString());
}
response.add("originalTerms", originalTermStr);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SynonymTokenizer method getTS2.
protected TokenStream getTS2() {
// String s = "Hi-Speed10 foo";
return new TokenStream() {
Iterator<Token> iter;
List<Token> lst;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
{
lst = new ArrayList<>();
Token t;
t = createToken("hi", 0, 2);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("hispeed", 0, 8);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("speed", 3, 8);
t.setPositionIncrement(0);
lst.add(t);
t = createToken("10", 8, 10);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("foo", 11, 14);
t.setPositionIncrement(1);
lst.add(t);
iter = lst.iterator();
}
@Override
public boolean incrementToken() {
if (iter.hasNext()) {
Token token = iter.next();
clearAttributes();
termAtt.setEmpty().append(token);
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
};
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SynonymTokenizer method getTS2a.
// same token-stream as above, but the bigger token comes first this time
protected TokenStream getTS2a() {
// String s = "Hi-Speed10 foo";
return new TokenStream() {
Iterator<Token> iter;
List<Token> lst;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
{
lst = new ArrayList<>();
Token t;
t = createToken("hispeed", 0, 8);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("hi", 0, 2);
t.setPositionIncrement(0);
lst.add(t);
t = createToken("speed", 3, 8);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("10", 8, 10);
t.setPositionIncrement(1);
lst.add(t);
t = createToken("foo", 11, 14);
t.setPositionIncrement(1);
lst.add(t);
iter = lst.iterator();
}
@Override
public boolean incrementToken() {
if (iter.hasNext()) {
Token token = iter.next();
clearAttributes();
termAtt.setEmpty().append(token);
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
iter = lst.iterator();
}
};
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SynonymTokenizer method testNotRewriteMultiTermQuery.
public void testNotRewriteMultiTermQuery() throws IOException {
// field "bar": (not the field we ultimately want to extract)
MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true);
WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {
@Override
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
assertEquals(mtq, query);
super.extract(query, boost, terms);
}
};
extractor.setExpandMultiTermQuery(true);
extractor.setMaxDocCharsToAnalyze(51200);
extractor.getWeightedSpanTerms(mtq, 3, new CannedTokenStream(new Token("aa", 0, 2), new Token("bb", 2, 4)), // field "foo"
"foo");
}
Aggregations