use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.
public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
// query.add(new SpanTermQuery(new Term(FIELD, "the")));
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class PayloadSpanUtil method queryToSpanQuery.
private void queryToSpanQuery(Query query, Collection<byte[]> payloads) throws IOException {
if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
queryToSpanQuery(clause.getQuery(), payloads);
}
}
} else if (query instanceof PhraseQuery) {
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = ((PhraseQuery) query).getSlop();
boolean inorder = false;
if (slop == 0) {
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
getPayloads(payloads, sp);
} else if (query instanceof TermQuery) {
SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
getPayloads(payloads, stq);
} else if (query instanceof SpanQuery) {
getPayloads(payloads, (SpanQuery) query);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext(); ) {
queryToSpanQuery(iterator.next(), payloads);
}
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "rawtypes", "unchecked" }) final List<Query>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<Query> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (final Term term : termArray) {
disjuncts.add(new SpanTermQuery(term));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<Query> disjuncts = disjunctLists[i];
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
getPayloads(payloads, sp);
}
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterStrictPhrases method testMtq.
/**
* Like {@link #testRewriteAndMtq} but no rewrite.
*/
public void testMtq() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("body", "bravo")), // does NOT rewrite
new SpanTermQuery(new Term("body", "charlie")) }, 0, true);
BooleanQuery query = new BooleanQuery.Builder().add(snq, BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "al")), // MTQ
BooleanClause.Occur.MUST).add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST).add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>" }, snippets);
// do again, this time with MTQ disabled.
//disable but leave phrase processing enabled
highlighter.setHandleMultiTermQuery(false);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha" }, snippets);
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class BoostingTermBuilder method getSpanQuery.
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
SpanQuery btq = new PayloadScoreQuery(new SpanTermQuery(new Term(fieldName, value)), new AveragePayloadFunction());
btq = new SpanBoostQuery(btq, DOMUtils.getAttribute(e, "boost", 1.0f));
return btq;
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestQueryParser method testMultiWordSynonyms.
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testMultiWordSynonyms() throws Exception {
QueryParser dumb = new QueryParser("field", new Analyzer1());
dumb.setSplitOnWhitespace(false);
TermQuery guinea = new TermQuery(new Term("field", "guinea"));
TermQuery pig = new TermQuery(new Term("field", "pig"));
TermQuery cavy = new TermQuery(new Term("field", "cavy"));
// A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
BooleanQuery.Builder synonym = new BooleanQuery.Builder();
synonym.add(guinea, BooleanClause.Occur.MUST);
synonym.add(pig, BooleanClause.Occur.MUST);
BooleanQuery guineaPig = synonym.build();
PhraseQuery phraseGuineaPig = new PhraseQuery.Builder().add(new Term("field", "guinea")).add(new Term("field", "pig")).build();
BooleanQuery graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
assertEquals(graphQuery, dumb.parse("guinea pig"));
// With the phrase operator, a multi-word synonym source will form span near queries.
SpanNearQuery spanGuineaPig = SpanNearQuery.newOrderedNearQuery("field").addClause(new SpanTermQuery(new Term("field", "guinea"))).addClause(new SpanTermQuery(new Term("field", "pig"))).setSlop(0).build();
SpanTermQuery spanCavy = new SpanTermQuery(new Term("field", "cavy"));
SpanOrQuery spanPhrase = new SpanOrQuery(new SpanQuery[] { spanGuineaPig, spanCavy });
assertEquals(spanPhrase, dumb.parse("\"guinea pig\""));
// custom behavior, the synonyms are expanded, unless you use quote operator
QueryParser smart = new SmartQueryParser();
smart.setSplitOnWhitespace(false);
graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
assertEquals(graphQuery, smart.parse("guinea pig"));
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
}
Aggregations