use of org.apache.lucene.queries.CustomScoreQuery in project lucene-solr by apache.
the class SynonymTokenizer method testCustomScoreQueryHighlight.
public void testCustomScoreQueryHighlight() throws Exception {
TermQuery termQuery = new TermQuery(new Term(FIELD_NAME, "very"));
CustomScoreQuery query = new CustomScoreQuery(termQuery);
searcher = newSearcher(reader);
TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
assertEquals(2, hits.totalHits);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
final int docId0 = hits.scoreDocs[0].doc;
Document doc = searcher.doc(docId0);
String storedField = doc.get(FIELD_NAME);
TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("Hello this is a piece of text that is <B>very</B> long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
}
use of org.apache.lucene.queries.CustomScoreQuery in project lucene-solr by apache.
the class FastVectorHighlighterTest method testCustomScoreQueryHighlight.
public void testCustomScoreQueryHighlight() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
doc.add(field);
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
FieldQuery fieldQuery = highlighter.getFieldQuery(new CustomScoreQuery(new TermQuery(new Term("field", "foo"))), reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
// highlighted results are centered
assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.queries.CustomScoreQuery in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extract.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
*
* @param query
* Query to extract Terms from
* @param terms
* Map to place created WeightedSpanTerms in
* @throws IOException If there is a low-level I/O error
*/
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
extract(clause.getQuery(), boost, terms);
}
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
// sum position increments beyond 1
int positionGaps = 0;
int[] positions = phraseQuery.getPositions();
if (positions.length >= 2) {
// positions are in increasing order. max(0,...) is just a safeguard.
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
}
//if original slop is 0 then require inOrder
boolean inorder = (phraseQuery.getSlop() == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
} else if (query instanceof TermQuery || query instanceof SynonymQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
} else if (query instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) query).getQuery();
if (q != null) {
extract(q, boost, terms);
}
} else if (query instanceof CommonTermsQuery) {
// specialized since rewriting would change the result query
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof ToChildBlockJoinQuery) {
extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing
} else if (query instanceof CustomScoreQuery) {
extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {
if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
return;
}
Query origQuery = query;
final IndexReader reader = getLeafContext().reader();
Query rewritten;
if (query instanceof MultiTermQuery) {
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = origQuery.rewrite(reader);
}
if (rewritten != origQuery) {
// only rewrite once and then flatten again - the rewritten query could have a special treatment
// if this method is overwritten in a subclass or above in the next recursion
extract(rewritten, boost, terms);
} else {
extractUnknownQuery(query, terms);
}
}
}
use of org.apache.lucene.queries.CustomScoreQuery in project lucene-solr by apache.
the class FieldQuery method flatten.
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
while (sourceQuery instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) sourceQuery;
sourceQuery = bq.getQuery();
boost *= bq.getBoost();
}
if (sourceQuery instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) sourceQuery;
for (BooleanClause clause : bq) {
if (!clause.isProhibited()) {
flatten(clause.getQuery(), reader, flatQueries, boost);
}
}
} else if (sourceQuery instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
for (Query query : dmq) {
flatten(query, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof TermQuery) {
if (boost != 1f) {
sourceQuery = new BoostQuery(sourceQuery, boost);
}
if (!flatQueries.contains(sourceQuery))
flatQueries.add(sourceQuery);
} else if (sourceQuery instanceof SynonymQuery) {
SynonymQuery synQuery = (SynonymQuery) sourceQuery;
for (Term term : synQuery.getTerms()) {
flatten(new TermQuery(term), reader, flatQueries, boost);
}
} else if (sourceQuery instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) sourceQuery;
if (pq.getTerms().length == 1)
sourceQuery = new TermQuery(pq.getTerms()[0]);
if (boost != 1f) {
sourceQuery = new BoostQuery(sourceQuery, boost);
}
flatQueries.add(sourceQuery);
} else if (sourceQuery instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
if (q != null) {
flatten(q, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof CustomScoreQuery) {
final Query q = ((CustomScoreQuery) sourceQuery).getSubQuery();
if (q != null) {
flatten(q, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof ToParentBlockJoinQuery) {
Query childQuery = ((ToParentBlockJoinQuery) sourceQuery).getChildQuery();
if (childQuery != null) {
flatten(childQuery, reader, flatQueries, boost);
}
} else if (reader != null) {
Query query = sourceQuery;
Query rewritten;
if (sourceQuery instanceof MultiTermQuery) {
rewritten = new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS).rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = query.rewrite(reader);
}
if (rewritten != query) {
// only rewrite once and then flatten again - the rewritten query could have a speacial treatment
// if this method is overwritten in a subclass.
flatten(rewritten, reader, flatQueries, boost);
}
// if the query is already rewritten we discard it
}
// else discard queries
}
use of org.apache.lucene.queries.CustomScoreQuery in project ddf by codice.
the class GeoNamesQueryLuceneIndex method createQuery.
protected Query createQuery(final String queryString) throws ParseException {
final StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
final QueryParser nameQueryParser = new QueryParser(GeoNamesLuceneConstants.NAME_FIELD, standardAnalyzer);
nameQueryParser.setEnablePositionIncrements(false);
/* For the name, we construct a query searching for exactly the query string (the phrase
query), a query searching for all the terms in the query string (the AND query), and a
query searching for any of the terms in the query string (the OR query). We take the
maximum of the scores generated by these three queries and use that as the score for the
name. */
// Surround with quotes so Lucene looks for the words in the query as a phrase.
// Phrase query gets the biggest boost - 3.2 was obtained after some experimentation.
final Query phraseNameQuery = new BoostQuery(nameQueryParser.parse("\"" + queryString + "\""), 3.2f);
// By default, QueryParser uses OR to separate terms.
// We give OR queries the lowest boost because they're not as good as phrase matches or
// AND matches - 1 (the default boost value) was obtained after some experimentation.
final Query orNameQuery = nameQueryParser.parse(queryString);
nameQueryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
// We give AND queries the second-biggest boost because they're better than OR matches but
// not as good as phrase matches - 2 was obtained after some experimentation.
final Query andNameQuery = new BoostQuery(nameQueryParser.parse(queryString), 2f);
final List<Query> nameQueryList = Arrays.asList(phraseNameQuery, orNameQuery, andNameQuery);
// This query will score each document by the maximum of the three sub-queries.
final Query nameQuery = new DisjunctionMaxQuery(nameQueryList, 0);
final QueryParser alternateNamesQueryParser = new QueryParser(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, standardAnalyzer);
// For the alternate names, we perform an AND query and an OR query, both of which are
// boosted less than the name query because the alternate names are generally not as
// important.
// The OR query gets a lower boost - 0.5 was obtained after some experimentation.
final Query orAlternateNamesQuery = new BoostQuery(alternateNamesQueryParser.parse(queryString), 0.5f);
alternateNamesQueryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
// The AND query gets a higher boost - 1 (the default boost value) was obtained after some
// experimentation.
final Query andAlternateNamesQuery = alternateNamesQueryParser.parse(queryString);
final List<Query> alternateNamesQueryList = Arrays.asList(orAlternateNamesQuery, andAlternateNamesQuery);
// This query will score each document by the maximum of the two sub-queries.
final Query alternateNamesQuery = new DisjunctionMaxQuery(alternateNamesQueryList, 0);
final List<Query> queryList = Arrays.asList(nameQuery, alternateNamesQuery);
// This query will score each document by the sum of the two sub-queries, since both the
// name and the alternate names are important.
// The boost values ensure that how well the query matches the name has a bigger impact on
// the final score than how well it matches the alternate names.
final DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(queryList, 1.0f);
// This is the boost we calculated at index time, and it is applied in the CustomScoreQuery.
final FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource(GeoNamesLuceneConstants.BOOST_FIELD));
return new CustomScoreQuery(disjunctionMaxQuery, boostQuery);
}
Aggregations