use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class TestMoreLikeThis method testTopN.
public void testTopN() throws Exception {
int numDocs = 100;
int topN = 25;
// add series of docs with terms of decreasing df
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; i++) {
addDoc(writer, generateStrSeq(0, i + 1));
}
IndexReader reader = writer.getReader();
writer.close();
// setup MLT query
MoreLikeThis mlt = new MoreLikeThis(reader);
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
mlt.setAnalyzer(analyzer);
mlt.setMaxQueryTerms(topN);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { "text" });
// perform MLT query
String likeText = "";
for (String text : generateStrSeq(0, numDocs)) {
likeText += text + " ";
}
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
// check best terms are topN of highest idf
Collection<BooleanClause> clauses = query.clauses();
assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
Term[] expectedTerms = new Term[topN];
int idx = 0;
for (String text : generateStrSeq(numDocs - topN, topN)) {
expectedTerms[idx++] = new Term("text", text);
}
for (BooleanClause clause : clauses) {
Term term = ((TermQuery) clause.getQuery()).getTerm();
assertTrue(Arrays.asList(expectedTerms).contains(term));
}
// clean up
reader.close();
dir.close();
analyzer.close();
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class CloudMLTQParser method parse.
public Query parse() {
String id = localParams.get(QueryParsing.V);
// Do a Real Time Get for the document
SolrDocument doc = getDocument(id);
if (doc == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
}
String[] qf = localParams.getParams("qf");
Map<String, Float> boostFields = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
mlt.setBoost(boost);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
ArrayList<String> fields = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
// We might want to relook and change this in the future though.
SchemaField f = req.getSchema().getFieldOrNull(field);
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
fields.add(field);
}
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
for (String field : fieldNames) {
Collection<Object> fieldValues = doc.getFieldValues(field);
if (fieldValues != null) {
Collection<Object> values = new ArrayList();
for (Object val : fieldValues) {
if (val instanceof IndexableField) {
values.add(((IndexableField) val).stringValue());
} else {
values.add(val);
}
}
filteredDocument.put(field, values);
}
}
try {
Query rawMLTQuery = mlt.like(filteredDocument);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
e.printStackTrace();
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
}
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class SimpleMLTQParser method parse.
public Query parse() {
String defaultField = req.getSchema().getUniqueKeyField().getName();
String uniqueValue = localParams.get(QueryParsing.V);
String[] qf = localParams.getParams("qf");
SolrIndexSearcher searcher = req.getSearcher();
Query docIdQuery = createIdQuery(defaultField, uniqueValue);
Map<String, Float> boostFields = new HashMap<>();
try {
TopDocs td = searcher.search(docIdQuery, 1);
if (td.totalHits != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
ScoreDoc[] scoreDocs = td.scoreDocs;
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", false);
mlt.setBoost(boost);
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList<>();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
ArrayList<String> fields = new ArrayList();
for (String fieldName : fieldDefinitions.keySet()) {
if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
fields.add(fieldName);
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
}
}
use of org.apache.lucene.search.BooleanClause in project elasticsearch by elastic.
the class SimpleQueryParser method newPossiblyAnalyzedQuery.
/**
* Analyze the given string using its analyzer, constructing either a
* {@code PrefixQuery} or a {@code BooleanQuery} made up
* of {@code TermQuery}s and {@code PrefixQuery}s
*/
private Query newPossiblyAnalyzedQuery(String field, String termStr) {
List<List<BytesRef>> tlist = new ArrayList<>();
// get Analyzer from superclass and tokenize the term
try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) {
source.reset();
List<BytesRef> currentPos = new ArrayList<>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
boolean hasMoreTokens = source.incrementToken();
while (hasMoreTokens) {
if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
tlist.add(currentPos);
currentPos = new ArrayList<>();
}
final BytesRef term = getAnalyzer().normalize(field, termAtt.toString());
currentPos.add(term);
hasMoreTokens = source.incrementToken();
}
if (currentPos.isEmpty() == false) {
tlist.add(currentPos);
}
} catch (IOException e) {
// ignore
// TODO: we should not ignore the exception and return a prefix query with the original term ?
}
} catch (IOException e) {
// Bail on any exceptions, going with a regular prefix query
return new PrefixQuery(new Term(field, termStr));
}
if (tlist.size() == 0) {
return null;
}
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
}
// build a boolean query with prefix on the last position only.
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int pos = 0; pos < tlist.size(); pos++) {
List<BytesRef> plist = tlist.get(pos);
boolean isLastPos = (pos == tlist.size() - 1);
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = new PrefixQuery(new Term(field, plist.get(0)));
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)));
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
Term[] terms = new Term[plist.size()];
for (int i = 0; i < plist.size(); i++) {
terms[i] = new Term(field, plist.get(i));
}
posQuery = new SynonymQuery(terms);
} else {
BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
for (BytesRef token : plist) {
innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
}
posQuery = innerBuilder.setDisableCoord(true).build();
}
builder.add(new BooleanClause(posQuery, getDefaultOperator()));
}
return builder.build();
}
use of org.apache.lucene.search.BooleanClause in project elasticsearch by elastic.
the class MapperQueryParser method getFuzzyQuery.
protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
return getFuzzyQuerySingle(fields.iterator().next(), termStr, minSimilarity);
}
if (settings.useDisMax()) {
List<Query> queries = new ArrayList<>();
boolean added = false;
for (String mField : fields) {
Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
if (q != null) {
added = true;
queries.add(applyBoost(mField, q));
}
}
if (!added) {
return null;
}
return new DisjunctionMaxQuery(queries, settings.tieBreaker());
} else {
List<BooleanClause> clauses = new ArrayList<>();
for (String mField : fields) {
Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
if (q != null) {
clauses.add(new BooleanClause(applyBoost(mField, q), BooleanClause.Occur.SHOULD));
}
}
return getBooleanQueryCoordDisabled(clauses);
}
} else {
return getFuzzyQuerySingle(field, termStr, minSimilarity);
}
}
Aggregations