use of org.apache.lucene.analysis.tokenattributes.CharTermAttribute in project elasticsearch by elastic.
the class CommonTermsQueryBuilder method parseQueryString.
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
// Logic similar to QueryParser#getFieldQuery
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
BytesRefBuilder builder = new BytesRefBuilder();
while (source.incrementToken()) {
// UTF-8
builder.copyChars(termAtt);
query.add(new Term(field, builder.toBytesRef()));
}
}
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
return query;
}
use of org.apache.lucene.analysis.tokenattributes.CharTermAttribute in project elasticsearch by elastic.
the class TransportAnalyzeAction method simpleAnalyze.
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
int lastPosition = -1;
int lastOffset = 0;
for (String text : request.text()) {
try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), null));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
}
}
return tokens;
}
use of org.apache.lucene.analysis.tokenattributes.CharTermAttribute in project elasticsearch by elastic.
the class SimpleIcuCollationTokenFilterTests method assertCollation.
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);
stream1.reset();
stream2.reset();
assertThat(stream1.incrementToken(), equalTo(true));
assertThat(stream2.incrementToken(), equalTo(true));
assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
assertThat(stream1.incrementToken(), equalTo(false));
assertThat(stream2.incrementToken(), equalTo(false));
stream1.end();
stream2.end();
stream1.close();
stream2.close();
}
use of org.apache.lucene.analysis.tokenattributes.CharTermAttribute in project elasticsearch by elastic.
the class SimpleUkrainianAnalyzerTests method testAnalyzer.
private static void testAnalyzer(String source, String... expected_terms) throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer();
TokenStream ts = analyzer.tokenStream("test", source);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
for (String expected : expected_terms) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
assertThat(ts.incrementToken(), equalTo(false));
}
use of org.apache.lucene.analysis.tokenattributes.CharTermAttribute in project elasticsearch by elastic.
the class KuromojiAnalysisTests method assertSimpleTSOutput.
public static void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException {
stream.reset();
CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
assertThat(termAttr, notNullValue());
int i = 0;
while (stream.incrementToken()) {
assertThat(expected.length, greaterThan(i));
assertThat("expected different term at index " + i, expected[i++], equalTo(termAttr.toString()));
}
assertThat("not all tokens produced", i, equalTo(expected.length));
}
Aggregations