use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class AbstractTestCase method analyze.
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
List<BytesRef> bytesRefs = new ArrayList<>();
try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
bytesRefs.add(BytesRef.deepCopyOf(termAttribute.getBytesRef()));
}
tokenStream.end();
}
return bytesRefs;
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class TestPerfTasksLogic method assertEqualCollation.
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", text);
TokenStream ts2 = a2.tokenStream("bogus", text);
ts1.reset();
ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
assertTrue(ts1.incrementToken());
assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef();
assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());
ts1.close();
ts2.close();
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class CollationField method getCollationKey.
/**
* analyze the range with the analyzer, instead of the collator.
* because jdk collators might not be thread safe (when they are
* it's just that all methods are synced), this keeps things
* simple (we already have a threadlocal clone in the reused TS)
*/
private BytesRef getCollationKey(String field, String text) {
try (TokenStream source = analyzer.tokenStream(field, text)) {
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
// we control the analyzer here: most errors are impossible
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for text: " + text);
BytesRef bytes = BytesRef.deepCopyOf(termAtt.getBytesRef());
assert !source.incrementToken();
source.end();
return bytes;
} catch (IOException e) {
throw new RuntimeException("Unable to analyze text: " + text, e);
}
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project janusgraph by JanusGraph.
the class SolrIndex method customTokenize.
@SuppressWarnings("unchecked")
private List<String> customTokenize(String tokenizerClass, String value) {
CachingTokenFilter stream = null;
try {
final List<String> terms = new ArrayList<>();
final Tokenizer tokenizer = ((Constructor<Tokenizer>) ClassLoader.getSystemClassLoader().loadClass(tokenizerClass).getConstructor()).newInstance();
tokenizer.setReader(new StringReader(value));
stream = new CachingTokenFilter(tokenizer);
final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
stream.reset();
while (stream.incrementToken()) {
terms.add(termAtt.getBytesRef().utf8ToString());
}
return terms;
} catch (ReflectiveOperationException | IOException e) {
throw new IllegalArgumentException(e.getMessage(), e);
} finally {
IOUtils.closeQuietly(stream);
}
}
Aggregations