use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class MultiFieldQueryParser method applySlop.
private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
PhraseQuery pq = (PhraseQuery) q;
org.apache.lucene.index.Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
q = builder.build();
} else if (q instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery) q;
if (slop != mpq.getSlop()) {
q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
}
return q;
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testSparseSpan.
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamSparse(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(0, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testInOrderWithStopWords.
//shows the need to require inOrder if getSlop() == 0, not if final slop == 0
//in WeightedSpanTermExtractor
public void testInOrderWithStopWords() throws IOException, InvalidTokenOffsetsException {
MockAnalyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
final String TEXT = "the cd the ab the the the the the the the ab the cd the";
final Directory directory = newDirectory();
try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(stopAnalyzer))) {
final Document document = new Document();
document.add(newTextField(FIELD, TEXT, Store.YES));
indexWriter.addDocument(document);
}
try (IndexReader indexReader = DirectoryReader.open(directory)) {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
//equivalent of "ab the cd"
final PhraseQuery phraseQuery = new PhraseQuery.Builder().add(new Term(FIELD, "ab"), 0).add(new Term(FIELD, "cd"), 2).build();
TopDocs hits = indexSearcher.search(phraseQuery, 100);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
String[] frags = highlighter.getBestFragments(stopAnalyzer, FIELD, TEXT, 10);
assertEquals(1, frags.length);
assertTrue("contains <B>ab</B> the <B>cd</B>", (frags[0].contains("<B>ab</B> the <B>cd</B>")));
assertTrue("does not contain <B>cd</B> the <B>ab</B>", (!frags[0].contains("<B>cd</B> the <B>ab</B>")));
} finally {
directory.close();
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testConcurrentPhrase.
public void testConcurrentPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "fox", "jumped");
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleQueryScorerPhraseHighlighting2.
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
PhraseQuery phraseQuery = new PhraseQuery(5, FIELD_NAME, "text", "piece", "long");
doSearching(phraseQuery);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 6);
}
Aggregations