use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.
the class KNearestNeighborClassifier method knnSearch.
private TopDocs knnSearch(String text) throws IOException {
BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
for (String fieldName : textFieldNames) {
String boost = null;
//terms boost actually helps in MLT queries
mlt.setBoost(true);
if (fieldName.contains("^")) {
String[] field2boost = fieldName.split("\\^");
fieldName = field2boost[0];
boost = field2boost[1];
}
if (boost != null) {
//if we have a field boost, we add it
mlt.setBoostFactor(Float.parseFloat(boost));
}
mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
// restore neutral boost for next field
mlt.setBoostFactor(1);
}
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
if (query != null) {
mltQuery.add(query, BooleanClause.Occur.MUST);
}
return indexSearcher.search(mltQuery.build(), k);
}
use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.
the class KNearestFuzzyClassifier method knnSearch.
private TopDocs knnSearch(String text) throws IOException {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
FuzzyLikeThisQuery fuzzyLikeThisQuery = new FuzzyLikeThisQuery(300, analyzer);
for (String fieldName : textFieldNames) {
// TODO: make this parameters configurable
fuzzyLikeThisQuery.addTerms(text, fieldName, 1f, 2);
}
bq.add(fuzzyLikeThisQuery, BooleanClause.Occur.MUST);
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
bq.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
if (query != null) {
bq.add(query, BooleanClause.Occur.MUST);
}
return indexSearcher.search(bq.build(), k);
}
use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.
the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInSpanFirst.
public void testNoSuchMultiTermsInSpanFirst() throws Exception {
//this hasn't been a problem
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery spanFirst = new SpanFirstQuery(spanNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
spanFirst = new SpanFirstQuery(spanWCNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
spanFirst = new SpanFirstQuery(spanRgxNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
spanFirst = new SpanFirstQuery(spanPrfxNoSuch, 10);
assertEquals(0, searcher.search(spanFirst, 10).totalHits);
}
use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.
the class SynonymTokenizer method testConstantScoreMultiTermQuery.
public void testConstantScoreMultiTermQuery() throws Exception {
numHighlights = 0;
query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
((WildcardQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
searcher = newSearcher(reader);
// query = unReWrittenQuery.rewrite(reader);
if (VERBOSE)
System.out.println("Searching for: " + query.toString(FIELD_NAME));
hits = searcher.search(query, 1000);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
// try null field
hits = searcher.search(query, 1000);
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, null);
Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
// try default field
hits = searcher.search(query, 1000);
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
}
use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.
the class SynonymTokenizer method testUnRewrittenQuery.
public void testUnRewrittenQuery() throws Exception {
final TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
// test to show how rewritten query can still be used
searcher = newSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
if (VERBOSE)
System.out.println("Searching with primitive query");
// forget to set this and...
// query=query.rewrite(reader);
TopDocs hits = searcher.search(query.build(), 1000);
// create an instance of the highlighter with the tags used to surround
// highlighted text
// QueryHighlightExtractor highlighter = new
// QueryHighlightExtractor(this,
// query, new StandardAnalyzer(TEST_VERSION));
int maxNumFragmentsRequired = 3;
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println(highlightedText);
}
// We expect to have zero highlights if the query is multi-terms and is
// not
// rewritten!
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
}
};
helper.start();
}
Aggregations