use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class SolrQueryParserBase method getBooleanQuery.
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
* to join.
*
* @return Resulting {@link org.apache.lucene.search.Query} object.
*/
protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError {
if (clauses.size() == 0) {
// all clause words were filtered away by the analyzer.
return null;
}
SchemaField sfield = null;
List<RawQuery> fieldValues = null;
boolean onlyRawQueries = true;
int allRawQueriesTermCount = 0;
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof RawQuery) {
allRawQueriesTermCount += ((RawQuery) clause.getQuery()).getTermCount();
} else {
onlyRawQueries = false;
}
}
boolean useTermsQuery = (flags & QParser.FLAG_FILTER) != 0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) {
Query subq = clause.getQuery();
if (subq instanceof RawQuery) {
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
// We only collect optional terms for set queries. Since this isn't optional,
// convert the raw query to a normal query and handle as usual.
clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
} else {
// Optional raw query.
RawQuery rawq = (RawQuery) subq;
// only look up fmap and type info on a field change
if (sfield != rawq.sfield) {
sfield = rawq.sfield;
fieldValues = fmap.get(sfield);
// the "useTermQuery" check.
if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues);
}
}
if (fieldValues != null) {
fieldValues.add(rawq);
continue;
}
clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
}
}
booleanBuilder.add(clause);
}
for (Map.Entry<SchemaField, List<RawQuery>> entry : fmap.entrySet()) {
sfield = entry.getKey();
fieldValues = entry.getValue();
FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
// use boolean query instead
for (RawQuery rawq : fieldValues) {
Query subq;
if (ft.isTokenized() && sfield.indexed()) {
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} else {
for (String externalVal : rawq.getExternalVals()) {
subq = ft.getFieldQuery(this.parser, sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
}
} else {
List<String> externalVals = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
// if this is everything, don't wrap in a boolean query
if (onlyRawQueries && termCount == allRawQueriesTermCount)
return subq;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
BooleanQuery bq = booleanBuilder.build();
if (bq.clauses().size() == 1) {
// Unwrap single SHOULD query
BooleanClause clause = bq.clauses().iterator().next();
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
return clause.getQuery();
}
}
return bq;
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class TestMoreLikeThis method testMultiFieldShouldReturnPerFieldBooleanQuery.
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161")
public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception {
IndexReader reader = null;
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
try {
int maxQueryTerms = 25;
String[] itShopItemForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" };
String[] itShopItemNotForSale = new String[] { "tie", "trousers", "shoes", "skirt", "hat" };
String[] clothesShopItemForSale = new String[] { "tie", "trousers", "shoes", "skirt", "hat" };
String[] clothesShopItemNotForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" };
// add series of shop docs
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < 300; i++) {
addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale);
}
for (int i = 0; i < 300; i++) {
addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
}
// Input Document is a clothes shop
int inputDocId = addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
reader = writer.getReader();
writer.close();
// setup MLT query
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setAnalyzer(analyzer);
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { FOR_SALE, NOT_FOR_SALE });
// perform MLT query
BooleanQuery query = (BooleanQuery) mlt.like(inputDocId);
Collection<BooleanClause> clauses = query.clauses();
Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>();
for (String itemForSale : clothesShopItemForSale) {
BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD);
expectedClothesShopClauses.add(booleanClause);
}
for (String itemNotForSale : clothesShopItemNotForSale) {
BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD);
expectedClothesShopClauses.add(booleanClause);
}
for (BooleanClause expectedClause : expectedClothesShopClauses) {
assertTrue(clauses.contains(expectedClause));
}
} finally {
// clean up
if (reader != null) {
reader.close();
}
dir.close();
analyzer.close();
}
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class TestMoreLikeThis method testMultiValues.
// LUCENE-5725
public void testMultiValues() throws Exception {
MoreLikeThis mlt = new MoreLikeThis(reader);
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
mlt.setAnalyzer(analyzer);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { "text" });
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene"), new StringReader("lucene release"), new StringReader("apache"), new StringReader("apache lucene"));
Collection<BooleanClause> clauses = query.clauses();
assertEquals("Expected 2 clauses only!", 2, clauses.size());
for (BooleanClause clause : clauses) {
Term term = ((TermQuery) clause.getQuery()).getTerm();
assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
}
analyzer.close();
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class TestMoreLikeThis method testBoostFactor.
public void testBoostFactor() throws Throwable {
Map<String, Float> originalValues = getOriginalValues();
MoreLikeThis mlt = new MoreLikeThis(reader);
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
mlt.setAnalyzer(analyzer);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { "text" });
mlt.setBoost(true);
// this mean that every term boost factor will be multiplied by this
// number
float boostFactor = 5;
mlt.setBoostFactor(boostFactor);
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
Collection<BooleanClause> clauses = query.clauses();
assertEquals("Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size());
for (BooleanClause clause : clauses) {
BoostQuery bq = (BoostQuery) clause.getQuery();
TermQuery tq = (TermQuery) bq.getQuery();
Float termBoost = originalValues.get(tq.getTerm().text());
assertNotNull("Expected term " + tq.getTerm().text(), termBoost);
float totalBoost = termBoost * boostFactor;
assertEquals("Expected boost of " + totalBoost + " for term '" + tq.getTerm().text() + "' got " + bq.getBoost(), totalBoost, bq.getBoost(), 0.0001);
}
analyzer.close();
}
use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.
the class TestMoreLikeThis method getOriginalValues.
private Map<String, Float> getOriginalValues() throws IOException {
Map<String, Float> originalValues = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(reader);
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
mlt.setAnalyzer(analyzer);
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] { "text" });
mlt.setBoost(true);
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
Collection<BooleanClause> clauses = query.clauses();
for (BooleanClause clause : clauses) {
BoostQuery bq = (BoostQuery) clause.getQuery();
TermQuery tq = (TermQuery) bq.getQuery();
originalValues.put(tq.getTerm().text(), bq.getBoost());
}
analyzer.close();
return originalValues;
}
Aggregations