use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testRepeatingTermsInMultBooleans.
// LUCENE-1752
public void testRepeatingTermsInMultBooleans() throws Exception {
String content = "x y z a b c d e f g b c g";
String f1 = "f1";
String f2 = "f2";
PhraseQuery f1ph1 = new PhraseQuery(f1, "a", "b", "c", "d");
PhraseQuery f2ph1 = new PhraseQuery(f2, "a", "b", "c", "d");
PhraseQuery f1ph2 = new PhraseQuery(f1, "b", "c", "g");
PhraseQuery f2ph2 = new PhraseQuery(f2, "b", "c", "g");
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
BooleanQuery.Builder leftChild = new BooleanQuery.Builder();
leftChild.add(f1ph1, Occur.SHOULD);
leftChild.add(f2ph1, Occur.SHOULD);
booleanQuery.add(leftChild.build(), Occur.MUST);
BooleanQuery.Builder rightChild = new BooleanQuery.Builder();
rightChild.add(f1ph2, Occur.SHOULD);
rightChild.add(f2ph2, Occur.SHOULD);
booleanQuery.add(rightChild.build(), Occur.MUST);
QueryScorer scorer = new QueryScorer(booleanQuery.build(), f1);
scorer.setExpandMultiTermQuery(false);
Highlighter h = new Highlighter(this, scorer);
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
h.getBestFragment(analyzer, f1, content);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 7);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testHighlighterWithPhraseQuery.
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testConcurrentSpan.
public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
indexSearcher.search(phraseQuery, new SimpleCollector() {
private int baseDoc;
@Override
public void collect(int i) {
bitset.set(this.baseDoc + i);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.baseDoc = context.docBase;
}
@Override
public void setScorer(org.apache.lucene.search.Scorer scorer) {
// Do Nothing
}
@Override
public boolean needsScores() {
return false;
}
});
assertEquals(1, bitset.cardinality());
final int maxDoc = indexReader.maxDoc();
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
for (int position = bitset.nextSetBit(0); position < maxDoc - 1; position = bitset.nextSetBit(position + 1)) {
assertEquals(0, position);
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(position), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
}
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testStopWords.
//shows the need to sum the increments in WeightedSpanTermExtractor
public void testStopWords() throws IOException, InvalidTokenOffsetsException {
MockAnalyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
final String TEXT = "the ab the the cd the the the ef the";
final Directory directory = newDirectory();
try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(stopAnalyzer))) {
final Document document = new Document();
document.add(newTextField(FIELD, TEXT, Store.YES));
indexWriter.addDocument(document);
}
try (IndexReader indexReader = DirectoryReader.open(directory)) {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
//equivalent of "ab the the cd the the the ef"
final PhraseQuery phraseQuery = new PhraseQuery.Builder().add(new Term(FIELD, "ab"), 0).add(new Term(FIELD, "cd"), 3).add(new Term(FIELD, "ef"), 7).build();
TopDocs hits = indexSearcher.search(phraseQuery, 100);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
assertEquals(1, highlighter.getBestFragments(stopAnalyzer, FIELD, TEXT, 10).length);
} finally {
directory.close();
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testHighlightingWithDefaultField.
public void testHighlightingWithDefaultField() throws Exception {
String s1 = "I call our world Flatland, not because we call it so,";
// Verify that a query against the default field results in text being
// highlighted
// regardless of the field name.
PhraseQuery q = new PhraseQuery(3, FIELD_NAME, "world", "flatland");
String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
if (VERBOSE)
System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Query in the default field results in text for *ANY* field being highlighted", expected, observed);
// Verify that a query against a named field does not result in any
// highlighting
// when the query field name differs from the name of the field being
// highlighted,
// which in this example happens to be the default field name.
q = new PhraseQuery(3, "text", "world", "flatland");
expected = s1;
observed = highlightField(q, FIELD_NAME, s1);
if (VERBOSE)
System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
assertEquals("Query in a named field does not result in highlighting when that field isn't in the query", s1, highlightField(q, FIELD_NAME, s1));
}
Aggregations