use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestIndexWriterExceptions method testUpdateDocsNonAbortingException.
public void testUpdateDocsNonAbortingException() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numDocs1 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs1; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
// Use addDocs (no exception) to get docs in the index:
final List<Document> docs = new ArrayList<>();
final int numDocs2 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs2; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("subid", "subs", Field.Store.NO));
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
}
w.addDocuments(docs);
final int numDocs3 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs3; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
docs.clear();
final int limit = TestUtil.nextInt(random(), 2, 25);
final int crashAt = random().nextInt(limit);
for (int docCount = 0; docCount < limit; docCount++) {
Document doc = new Document();
docs.add(doc);
doc.add(newStringField("id", docCount + "", Field.Store.NO));
doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
if (docCount == crashAt) {
Field f = newTextField("crash", "", Field.Store.NO);
doc.add(f);
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("crash me on the 4th token"));
// disable workflow checking as we forcefully close() in exceptional cases.
tokenizer.setEnableChecks(false);
f.setTokenStream(new CrashingFilter("crash", tokenizer));
}
}
IOException expected = expectThrows(IOException.class, () -> {
w.updateDocuments(new Term("subid", "subs"), docs);
});
assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
final int numDocs4 = random().nextInt(25);
for (int docCount = 0; docCount < numDocs4; docCount++) {
Document doc = new Document();
doc.add(newTextField("content", "good content", Field.Store.NO));
w.addDocument(doc);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
PhraseQuery pq = new PhraseQuery("content", "silly", "content");
assertEquals(numDocs2, s.search(pq, 1).totalHits);
pq = new PhraseQuery("content", "good", "content");
assertEquals(numDocs1 + numDocs3 + numDocs4, s.search(pq, 1).totalHits);
r.close();
dir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class ShingleAnalyzerWrapperTest method testShingleAnalyzerWrapperPhraseQuery.
/*
* This shows how to construct a phrase query containing shingles.
*/
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
j += posIncrAtt.getPositionIncrement();
String termText = termAtt.toString();
builder.add(new Term("content", termText), j);
}
ts.end();
}
PhraseQuery q = builder.build();
ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
int[] ranks = new int[] { 0 };
compareRanks(hits, ranks);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SolrPluginUtilsTest method testDisjunctionMaxQueryParser.
@Test
public void testDisjunctionMaxQueryParser() throws Exception {
Query out;
String t;
SolrQueryRequest req = req("df", "text");
QParser qparser = QParser.getParser("hi", "dismax", req);
DisjunctionMaxQueryParser qp = new SolrPluginUtils.DisjunctionMaxQueryParser(qparser, req.getParams().get("df"));
qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts("title^2.0 title_stemmed name^1.2 subject^0.5"));
qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts("subject^0.5 sind^1.5"));
/* first some sanity tests that don't use aliasing at all */
t = "XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", qp.getDefaultField(), ((TermQuery) out).getTerm().field());
t = "subject:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", "subject", ((TermQuery) out).getTerm().field());
/* field has untokenzied type, so this should be a term anyway */
t = "sind:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
assertEquals(t + " sanity test is wrong field", "sind", ((TermQuery) out).getTerm().field());
t = "subject:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t + " sanity test gave back null", out);
assertTrue(t + " sanity test isn't PhraseQuery: " + out.getClass(), out instanceof PhraseQuery);
assertEquals(t + " sanity test is wrong field", "subject", ((PhraseQuery) out).getTerms()[0].field());
/* now some tests that use aliasing */
/* basic usage of single "term" */
t = "hoss:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
assertEquals(t + " wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) out).iterator()));
/* odd case, but should still work, DMQ of one clause */
t = "test:YYYYY";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
assertEquals(t + " wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) out).iterator()));
/* basic usage of multiple "terms" */
t = "hoss:XXXXXXXX test:YYYYY";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery) out;
List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
assertEquals(t + " wrong number of clauses", 2, clauses.size());
Query sub = clauses.get(0).getQuery();
assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
sub = clauses.get(1).getQuery();
assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " second had wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) sub).iterator()));
}
/* a phrase, and a term that is a stop word for some fields */
t = "hoss:\"XXXXXX YYYYY\" hoss:the";
out = qp.parse(t);
assertNotNull(t + " was null", out);
assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery) out;
List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
assertEquals(t + " wrong number of clauses", 2, clauses.size());
Query sub = clauses.get(0).getQuery();
assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
sub = clauses.get(1).getQuery();
assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
assertEquals(t + " second had wrong number of clauses (stop words)", 2, countItems(((DisjunctionMaxQuery) sub).iterator()));
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestSpanSearchEquivalence method testSpanNearVersusPhrase.
/** SpanNearQuery([A, B], 0, true) = "A B" */
public void testSpanNearVersusPhrase() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery[] subquery = new SpanQuery[] { spanQuery(new SpanTermQuery(t1)), spanQuery(new SpanTermQuery(t2)) };
SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
if (t1.equals(t2)) {
assertSameSet(q1, q2);
} else {
assertSameScores(q1, q2);
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class MissesTest method testPhraseQuery.
public void testPhraseQuery() throws IOException, InvalidTokenOffsetsException {
try (Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
final PhraseQuery query = new PhraseQuery("test", "foo", "bar");
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
assertEquals("this is a <B>foo</B> <B>bar</B> example", highlighter.getBestFragment(analyzer, "test", "this is a foo bar example"));
assertNull(highlighter.getBestFragment(analyzer, "test", "this does not match"));
}
}
Aggregations