use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SolrPluginUtilsTest method testDisjunctionMaxQueryParser.
@Test
public void testDisjunctionMaxQueryParser() throws Exception {
Query out;
String t;
SolrQueryRequest req = req("df", "text");
QParser qparser = QParser.getParser("hi", "dismax", req);
DisjunctionMaxQueryParser qp = new SolrPluginUtils.DisjunctionMaxQueryParser(qparser, req.getParams().get("df"));
qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts("title^2.0 title_stemmed name^1.2 subject^0.5"));
qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts("subject^0.5 sind^1.5"));
/* first some sanity tests that don't use aliasing at all */
t = "XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", qp.getDefaultField(), ((TermQuery) out).getTerm().field());
t = "subject:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", "subject", ((TermQuery) out).getTerm().field());
/* field has untokenzied type, so this should be a term anyway */
t = "sind:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", "sind", ((TermQuery) out).getTerm().field());
t = "subject:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't PhraseQuery: " + out.getClass(), out instanceof PhraseQuery);
assertEquals(t + " sanity test is wrong field", "subject", ((PhraseQuery) out).getTerms()[0].field());
/* now some tests that use aliasing */
/* basic usage of single "term" */
t = "hoss:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
assertEquals(t + " wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) out).iterator()));
/* odd case, but should still work, DMQ of one clause */
t = "test:YYYYY";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
assertEquals(t + " wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) out).iterator()));
/* basic usage of multiple "terms" */
t = "hoss:XXXXXXXX test:YYYYY";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery) out;
List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
assertEquals(t + " wrong number of clauses", 2, clauses.size());
Query sub = clauses.get(0).getQuery();
assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
sub = clauses.get(1).getQuery();
assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " second had wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) sub).iterator()));
}
/* a phrase, and a term that is a stop word for some fields */
t = "hoss:\"XXXXXX YYYYY\" hoss:the";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery) out;
List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
assertEquals(t + " wrong number of clauses", 2, clauses.size());
Query sub = clauses.get(0).getQuery();
assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
sub = clauses.get(1).getQuery();
assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " second had wrong number of clauses (stop words)", 2, countItems(((DisjunctionMaxQuery) sub).iterator()));
}
}
use of org.apache.lucene.search.PhraseQuery in project jackrabbit by apache.
the class AbstractExcerpt method getQueryTerms.
private static void getQueryTerms(Query q, Set<Term[]> relevantTerms) {
if (q instanceof BooleanQuery) {
final BooleanQuery bq = (BooleanQuery) q;
for (BooleanClause clause : bq.getClauses()) {
getQueryTerms(clause.getQuery(), relevantTerms);
}
return;
}
//need to preserve insertion order
Set<Term> extractedTerms = new LinkedHashSet<Term>();
q.extractTerms(extractedTerms);
Set<Term> filteredTerms = filterRelevantTerms(extractedTerms);
if (!filteredTerms.isEmpty()) {
if (q instanceof PhraseQuery) {
// inline the terms, basically a 'must all' condition
relevantTerms.add(filteredTerms.toArray(new Term[] {}));
} else {
// each possible term gets a new slot
for (Term t : filteredTerms) {
relevantTerms.add(new Term[] { t });
}
}
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestAddIndexes method testWithPendingDeletes2.
public void testWithPendingDeletes2() throws IOException {
// main directory
Directory dir = newDirectory();
// auxiliary directory
Directory aux = newDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
doc.add(new IntPoint("doc", i));
doc.add(new IntPoint("doc2d", i, i));
doc.add(new NumericDocValuesField("dv", i));
writer.updateDocument(new Term("id", "" + (i % 10)), doc);
}
writer.addIndexes(aux);
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery("content", "bbb", "14");
writer.deleteDocuments(q);
writer.forceMerge(1);
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestAddIndexes method testWithPendingDeletes.
public void testWithPendingDeletes() throws IOException {
// main directory
Directory dir = newDirectory();
// auxiliary directory
Directory aux = newDirectory();
setUpDirs(dir, aux);
IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
writer.addIndexes(aux);
// docs, so 10 pending deletes:
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
doc.add(new IntPoint("doc", i));
doc.add(new IntPoint("doc2d", i, i));
doc.add(new NumericDocValuesField("dv", i));
writer.updateDocument(new Term("id", "" + (i % 10)), doc);
}
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery("content", "bbb", "14");
writer.deleteDocuments(q);
writer.forceMerge(1);
writer.commit();
verifyNumDocs(dir, 1039);
verifyTermDocs(dir, new Term("content", "aaa"), 1030);
verifyTermDocs(dir, new Term("content", "bbb"), 9);
writer.close();
dir.close();
aux.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestIndexWriterExceptions method testAddDocsNonAbortingException.
public void testAddDocsNonAbortingException() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs1 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs1; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final List<Document> docs = new ArrayList<>();
for (int docCount = 0; docCount < 7; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == 4) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token"));
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
f.setTokenStream(new CrashingFilter("crash", tokenizer));
}
}
IOException expected = expectThrows(IOException.class, () -> {
w.addDocuments(docs);
});
assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
final int numDocs2 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs2; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
PhraseQuery pq = new PhraseQuery("content", "silly", "good");
assertEquals(0, s.search(pq, 1).totalHits);
pq = new PhraseQuery("content", "good", "content");
assertEquals(numDocs1 + numDocs2, s.search(pq, 1).totalHits);
r.close();
dir.close();
}
Aggregations