use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestFreeTextSuggester method testIllegalByteDuringQuery.
public void testIllegalByteDuringQuery() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz", 50));
Analyzer analyzer = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(analyzer);
sug.build(new InputArrayIterator(keys));
expectThrows(IllegalArgumentException.class, () -> {
sug.lookup("foob", 10);
});
analyzer.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class FuzzySuggesterTest method testRandom2.
public void testRandom2() throws Throwable {
final int NUM = atLeast(200);
final List<Input> answers = new ArrayList<>();
final Set<String> seen = new HashSet<>();
for (int i = 0; i < NUM; i++) {
final String s = randomSimpleString(8);
if (!seen.contains(s)) {
answers.add(new Input(s, random().nextInt(1000)));
seen.add(s);
}
}
Collections.sort(answers, new Comparator<Input>() {
@Override
public int compare(Input a, Input b) {
return a.term.compareTo(b.term);
}
});
if (VERBOSE) {
System.out.println("\nTEST: targets");
for (Input tf : answers) {
System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v);
}
}
Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
int maxEdits = random().nextBoolean() ? 1 : 2;
int prefixLen = random().nextInt(4);
boolean transpositions = random().nextBoolean();
// TODO: test graph analyzers
// TODO: test exactFirst / preserveSep permutations
Directory tempDir = getDirectory();
FuzzySuggester suggest = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
if (VERBOSE) {
System.out.println("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
}
Collections.shuffle(answers, random());
suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
final int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
final String frag = randomSimpleString(6);
if (VERBOSE) {
System.out.println("\nTEST: iter frag=" + frag);
}
final List<LookupResult> expected = slowFuzzyMatch(prefixLen, maxEdits, transpositions, answers, frag);
if (VERBOSE) {
System.out.println(" expected: " + expected.size());
for (LookupResult c : expected) {
System.out.println(" " + c);
}
}
final List<LookupResult> actual = suggest.lookup(frag, false, NUM);
if (VERBOSE) {
System.out.println(" actual: " + actual.size());
for (LookupResult c : actual) {
System.out.println(" " + c);
}
}
Collections.sort(actual, new CompareByCostThenAlpha());
final int limit = Math.min(expected.size(), actual.size());
for (int ans = 0; ans < limit; ans++) {
final LookupResult c0 = expected.get(ans);
final LookupResult c1 = actual.get(ans);
assertEquals("expected " + c0.key + " but got " + c1.key, 0, CHARSEQUENCE_COMPARATOR.compare(c0.key, c1.key));
assertEquals(c0.value, c1.value);
}
assertEquals(expected.size(), actual.size());
}
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class FuzzySuggesterTest method testEditSeps.
public void testEditSeps() throws Exception {
Analyzer a = new MockAnalyzer(random());
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
List<Input> keys = Arrays.asList(new Input[] { new Input("foo bar", 40), new Input("foo bar baz", 50), new Input("barbaz", 60), new Input("barbazfoo", 10) });
Collections.shuffle(keys, random());
suggester.build(new InputArrayIterator(keys));
assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
assertEquals("[barbaz/60, barbazfoo/10]", suggester.lookup("bar baz", false, 5).toString());
assertEquals("[barbazfoo/10]", suggester.lookup("bar baz foo", false, 5).toString());
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testHighlight.
public void testHighlight() throws Exception {
Input[] keys = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
suggester.close();
a.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testContextNotAllTermsRequired.
public void testContextNotAllTermsRequired() throws Exception {
Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz")) };
Path tempDir = createTempDir("analyzingInfixContext");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
// No context provided, all results returned
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, false, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "foo" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, false, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" context and len
results = suggester.lookup(TestUtil.stringToCharSequence("len", random()), asSet("foo"), 10, false, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("<b>len</b>d me your ear", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
suggester.close();
}
Aggregations