use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleSpanFragmenter.
public void testSimpleSpanFragmenter() throws Exception {
Builder builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "piece"), 0);
builder.add(new Term(FIELD_NAME, "text"), 2);
builder.add(new Term(FIELD_NAME, "very"), 5);
builder.add(new Term(FIELD_NAME, "long"), 6);
PhraseQuery phraseQuery = builder.build();
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
phraseQuery = new PhraseQuery(FIELD_NAME, "been", "shot");
doSearching(query);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleQueryScorerPhraseHighlighting.
public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "very"), 0);
builder.add(new Term(FIELD_NAME, "long"), 1);
builder.add(new Term(FIELD_NAME, "contains"), 3);
PhraseQuery phraseQuery = builder.build();
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 3);
numHighlights = 0;
builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "piece"), 1);
builder.add(new Term(FIELD_NAME, "text"), 3);
builder.add(new Term(FIELD_NAME, "refers"), 4);
builder.add(new Term(FIELD_NAME, "kennedy"), 6);
phraseQuery = builder.build();
doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
numHighlights = 0;
builder = new PhraseQuery.Builder();
builder.add(new Term(FIELD_NAME, "lets"), 0);
builder.add(new Term(FIELD_NAME, "lets"), 4);
builder.add(new Term(FIELD_NAME, "lets"), 8);
builder.add(new Term(FIELD_NAME, "lets"), 12);
phraseQuery = builder.build();
doSearching(phraseQuery);
maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestMemoryIndexAgainstRAMDir method testSameFieldAddedMultipleTimes.
public void testSameFieldAddedMultipleTimes() throws IOException {
MemoryIndex mindex = randomMemoryIndex();
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
mindex.addField("field", "the quick brown fox", mockAnalyzer);
mindex.addField("field", "jumps over the", mockAnalyzer);
LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader();
TestUtil.checkReader(reader);
assertEquals(7, reader.terms("field").getSumTotalTermFreq());
PhraseQuery query = new PhraseQuery("field", "fox", "jumps");
assertTrue(mindex.search(query) > 0.1);
mindex.reset();
mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
mindex.addField("field", "the quick brown fox", mockAnalyzer);
mindex.addField("field", "jumps over the", mockAnalyzer);
assertEquals(0, mindex.search(query), 0.00001f);
query = new PhraseQuery(10, "field", "fox", "jumps");
assertTrue("posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestMemoryIndex method testBuildFromDocument.
@Test
public void testBuildFromDocument() {
Document doc = new Document();
doc.add(new TextField("field1", "some text", Field.Store.NO));
doc.add(new TextField("field1", "some more text", Field.Store.NO));
doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
analyzer.setPositionIncrementGap(100);
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestQPHelper method testSimple.
public void testSimple() throws Exception {
assertQueryEquals("field=a", null, "a");
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("term term term", null, "term term term");
assertQueryEquals("t�rm term term", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), "t�rm term term");
assertQueryEquals("�mlaut", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), "�mlaut");
// FIXME: change MockAnalyzer to not extend CharTokenizer for this test
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
assertQueryEquals("a AND NOT b", null, "+a -b");
assertQueryEquals("a AND -b", null, "+a -b");
assertQueryEquals("a AND !b", null, "+a -b");
assertQueryEquals("a && b", null, "+a +b");
assertQueryEquals("a && ! b", null, "+a -b");
assertQueryEquals("a OR b", null, "a b");
assertQueryEquals("a || b", null, "a b");
assertQueryEquals("a OR !b", null, "a -b");
assertQueryEquals("a OR ! b", null, "a -b");
assertQueryEquals("a OR -b", null, "a -b");
assertQueryEquals("+term -term term", null, "+term -term term");
assertQueryEquals("foo:term AND field:anotherTerm", null, "+foo:term +anotherterm");
assertQueryEquals("term AND \"phrase phrase\"", null, "+term +\"phrase phrase\"");
assertQueryEquals("\"hello there\"", null, "\"hello there\"");
assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
assertTrue(getQuery("hello", null) instanceof TermQuery);
assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
assertQueryEquals("germ term^2.0", null, "germ (term)^2.0");
assertQueryEquals("(term)^2.0", null, "(term)^2.0");
assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
assertQueryEquals("term^2.0", null, "(term)^2.0");
assertQueryEquals("term^2", null, "(term)^2.0");
assertQueryEquals("\"germ term\"^2.0", null, "(\"germ term\")^2.0");
assertQueryEquals("\"term germ\"^2", null, "(\"term germ\")^2.0");
assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, "+(foo bar) +(baz boo)");
assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, "+(apple \"steve jobs\") -(foo bar baz)");
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\"");
}
Aggregations