use of org.apache.lucene.analysis.TokenStreamToAutomaton in project elasticsearch by elastic.
the class XFuzzySuggester method getTokenStreamToAutomaton.
@Override
public TokenStreamToAutomaton getTokenStreamToAutomaton() {
final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
tsta.setUnicodeArcs(unicodeAware);
return tsta;
}
use of org.apache.lucene.analysis.TokenStreamToAutomaton in project lucene-solr by apache.
the class FuzzySuggesterTest method testRandom.
public void testRandom() throws Exception {
int numQueries = atLeast(100);
final List<TermFreqPayload2> slowCompletor = new ArrayList<>();
final TreeSet<String> allPrefixes = new TreeSet<>();
final Set<String> seen = new HashSet<>();
Input[] keys = new Input[numQueries];
boolean preserveSep = random().nextBoolean();
boolean unicodeAware = random().nextBoolean();
final int numStopChars = random().nextInt(10);
final boolean preserveHoles = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: " + numQueries + " words; preserveSep=" + preserveSep + " ; unicodeAware=" + unicodeAware + " numStopChars=" + numStopChars + " preserveHoles=" + preserveHoles);
}
for (int i = 0; i < numQueries; i++) {
int numTokens = TestUtil.nextInt(random(), 1, 4);
String key;
String analyzedKey;
while (true) {
key = "";
analyzedKey = "";
boolean lastRemoved = false;
for (int token = 0; token < numTokens; token++) {
String s;
while (true) {
// TODO: would be nice to fix this slowCompletor/comparator to
// use full range, but we might lose some coverage too...
s = TestUtil.randomSimpleString(random());
if (s.length() > 0) {
if (token > 0) {
key += " ";
}
if (preserveSep && analyzedKey.length() > 0 && (unicodeAware ? analyzedKey.codePointAt(analyzedKey.codePointCount(0, analyzedKey.length()) - 1) != ' ' : analyzedKey.charAt(analyzedKey.length() - 1) != ' ')) {
analyzedKey += " ";
}
key += s;
if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
if (preserveSep && preserveHoles) {
analyzedKey += '