use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project elasticsearch by elastic.
the class PercolateQueryTests method testPercolateQuery.
public void testPercolateQuery() throws Exception {
List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
List<Query> queries = new ArrayList<>();
PercolateQuery.QueryStore queryStore = ctx -> queries::get;
queries.add(new TermQuery(new Term("field", "fox")));
docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
snp.addClause(new SpanTermQuery(new Term("field", "dog")));
snp.setSlop(2);
queries.add(snp.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "quick"));
pq1.add(new Term("field", "brown"));
pq1.add(new Term("field", "jumps"));
pq1.setSlop(1);
queries.add(pq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
queries.add(bq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
// no scoring, wrapping it in a constant score query:
Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
TopDocs topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(1));
assertThat(topDocs.scoreDocs.length, equalTo(1));
assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
Explanation explanation = shardSearcher.explain(query, 0);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(4));
query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
explanation = shardSearcher.explain(query, 3);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testCreateCandidateQuery.
public void testCreateCandidateQuery() throws Exception {
addQueryMapping();
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
assertEquals(2, candidateQuery.clauses().size());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
PrefixCodedTerms terms = termsQuery.getTermData();
assertThat(terms.size(), equalTo(14L));
PrefixCodedTerms.TermIterator termIterator = terms.iterator();
assertTermIterator(termIterator, "_field3 me", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "_field3 unhide", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 brown", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 dog", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 fox", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 jumps", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 lazy", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 over", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 quick", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 the", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 more", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 some", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 text", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field4 123", fieldType.queryTermsField.name());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery());
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class IndexBasedSpellCheckerTest method testAlternateLocation.
@Test
public void testAlternateLocation() throws Exception {
String[] ALT_DOCS = new String[] { "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" };
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File tmpDir = createTempDir().toFile();
File indexDir = new File(tmpDir, "spellingIdx");
//create a standalone index
File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
Directory dir = newFSDirectory(altIndexDir.toPath());
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new WhitespaceAnalyzer()));
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();
doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
dir.close();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
RefCounted<SolrIndexSearcher> holder = core.getSearcher();
SolrIndexSearcher searcher = holder.get();
try {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("flesh");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size() == 0);
spellOpts.tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
}
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class SpellingQueryConverterTest method testMultipleClauses.
@Test
public void testMultipleClauses() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// two field:value pairs should give two tokens
Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
// a field:value pair and a search term should give two tokens
tokens = converter.convert("text_field:我购买了道具和服装。 bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
use of org.apache.lucene.analysis.core.WhitespaceAnalyzer in project lucene-solr by apache.
the class SpellingQueryConverterTest method testNumeric.
@Test
public void testNumeric() throws Exception {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
String[] queries = { "12345", "foo:12345", "12345 67890", "foo:(12345 67890)", "foo:(life 67890)", "12345 life", "+12345 +life", "-12345 life" };
int[] tokensToExpect = { 1, 1, 2, 2, 2, 2, 2, 2 };
for (int i = 0; i < queries.length; i++) {
Collection<Token> tokens = converter.convert(queries[i]);
assertTrue("tokens Size: " + tokens.size() + " is not: " + tokensToExpect[i], tokens.size() == tokensToExpect[i]);
}
}
Aggregations