use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project lucene-solr by apache.
the class TestUnifiedHighlighterStrictPhrases method testFilteredOutSpan.
public void testFilteredOutSpan() throws IOException {
indexWriter.addDocument(newDoc("freezing cold stuff like stuff freedom of speech"));
initReaderSearcherHighlighter();
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "free*"));
SpanMultiTermQueryWrapper<WildcardQuery> wildcardSpanQuery = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanTermQuery termQuery = new SpanTermQuery(new Term("body", "speech"));
SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[] { wildcardSpanQuery, termQuery }, 3, false);
BooleanQuery query = new BooleanQuery.Builder().add(spanQuery, BooleanClause.Occur.MUST).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[] { "freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>" }, snippets);
}
use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project SearchServices by Alfresco.
the class Solr4QueryParser method generateSpanOrQuery.
/**
* @param field
* @param fixedTokenSequences
* LinkedList<LinkedList<PackedTokenAttributeImpl>>
* @return Query
*/
protected SpanQuery generateSpanOrQuery(String field, LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences) {
PackedTokenAttributeImpl nextToken;
ArrayList<SpanQuery> spanOrQueryParts = new ArrayList<SpanQuery>();
for (LinkedList<PackedTokenAttributeImpl> tokenSequence : fixedTokenSequences) {
int gap = 1;
SpanQuery spanQuery = null;
ArrayList<SpanQuery> atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
// create flat nearQuery
if (getEnablePositionIncrements() && isAllTokensSequentiallyShifted(tokenSequence)) {
// there will be no tokens at same position
List<SpanQuery> wildWrappedList = new ArrayList<SpanQuery>(tokenSequence.size());
for (PackedTokenAttributeImpl token : tokenSequence) {
String termText = token.toString();
Term term = new Term(field, termText);
SpanQuery nextSpanQuery = wrapWildcardTerms(term);
wildWrappedList.add(nextSpanQuery);
}
if (wildWrappedList.size() == 1) {
spanQuery = wildWrappedList.get(0);
} else {
spanQuery = new SpanNearQuery(wildWrappedList.toArray(new SpanQuery[wildWrappedList.size()]), 0, true);
}
} else {
for (int i = 0; i < tokenSequence.size(); i++) {
nextToken = (PackedTokenAttributeImpl) tokenSequence.get(i);
String termText = nextToken.toString();
Term term = new Term(field, termText);
if (getEnablePositionIncrements()) {
SpanQuery nextSpanQuery = wrapWildcardTerms(term);
if (gap == 0) {
atSamePositionSpanOrQueryParts.add(nextSpanQuery);
} else {
if (atSamePositionSpanOrQueryParts.size() == 0) {
if (spanQuery == null) {
spanQuery = nextSpanQuery;
} else {
spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, nextSpanQuery }, (gap - 1) + internalSlop, internalSlop < 2);
}
atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
} else if (atSamePositionSpanOrQueryParts.size() == 1) {
if (spanQuery == null) {
spanQuery = atSamePositionSpanOrQueryParts.get(0);
} else {
spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
}
atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
atSamePositionSpanOrQueryParts.add(nextSpanQuery);
} else {
if (spanQuery == null) {
spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
} else {
spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
}
atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
atSamePositionSpanOrQueryParts.add(nextSpanQuery);
}
}
gap = nextToken.getPositionIncrement();
} else {
SpanQuery nextSpanQuery;
if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term);
SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery> wrapper = new SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery>(wildQuery);
wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit));
nextSpanQuery = wrapper;
} else {
nextSpanQuery = new SpanTermQuery(term);
}
if (spanQuery == null) {
spanQuery = new SpanOrQuery(nextSpanQuery);
} else {
spanQuery = new SpanOrQuery(spanQuery, nextSpanQuery);
}
}
}
}
if (atSamePositionSpanOrQueryParts.size() == 0) {
spanOrQueryParts.add(spanQuery);
} else if (atSamePositionSpanOrQueryParts.size() == 1) {
if (spanQuery == null) {
spanQuery = atSamePositionSpanOrQueryParts.get(0);
} else {
spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
}
atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
spanOrQueryParts.add(spanQuery);
} else {
if (spanQuery == null) {
spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
} else {
spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
}
atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
spanOrQueryParts.add(spanQuery);
}
}
if (spanOrQueryParts.size() == 1) {
return spanOrQueryParts.get(0);
} else {
return new SpanOrQuery(spanOrQueryParts.toArray(new SpanQuery[] {}));
}
}
use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project SearchServices by Alfresco.
the class Solr4QueryParser method wrapWildcardTerms.
private SpanQuery wrapWildcardTerms(org.apache.lucene.index.Term term) {
String termText = term.text();
SpanQuery nextSpanQuery;
if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term);
SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery> wrapper = new SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery>(wildQuery);
wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit));
nextSpanQuery = wrapper;
} else {
nextSpanQuery = new SpanTermQuery(term);
}
return nextSpanQuery;
}
use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project Krill by KorAP.
the class TestMultipleDistanceIndex method testQueryWithWildCard.
@Test
public void testQueryWithWildCard() throws IOException {
// meine* /+w1:2,s0 &Erfahrung
ki = new KrillIndex();
ki.addDoc(createFieldDoc5());
ki.commit();
// Check simple rewriting
WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:Meine*"));
SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery);
assertEquals(wcquery.toString(), "tokens:s:Meine*");
kr = ki.search(mtq, (short) 10);
assertEquals(4, kr.getMatches().size());
assertEquals(0, kr.getMatch(0).getStartPos());
assertEquals(1, kr.getMatch(0).getEndPos());
// Check rewriting in multidistance query
SpanQuery sq = new SpanTermQuery(new Term("tokens", "l:Erfahrung"));
kr = ki.search(sq, (short) 10);
assertEquals(4, kr.getMatches().size());
List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
constraints.add(createConstraint("w", 1, 2, true, false));
constraints.add(createConstraint("tokens", "s", 0, 0, true, false));
SpanQuery mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true);
assertEquals(mdsq.toString(), "spanMultipleDistance(SpanMultiTermQueryWrapper(tokens:s:Meine*), " + "tokens:l:Erfahrung, [(w[1:2], ordered, notExcluded), (s[0:0], " + "ordered, notExcluded)])");
kr = ki.search(mdsq, (short) 10);
assertEquals(3, kr.getMatches().size());
assertEquals(0, kr.getMatch(0).getStartPos());
assertEquals(2, kr.getMatch(0).getEndPos());
// Check skipping with multiple documents
ki.addDoc(createFieldDoc6());
ki.addDoc(createFieldDoc7());
ki.addDoc(createFieldDoc8());
ki.commit();
kr = ki.search(mdsq, (short) 10);
assertEquals(6, kr.getMatches().size());
}
use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project Krill by KorAP.
the class TestRegexIndex method testWildcardPlusRewritten.
@Test
public void testWildcardPlusRewritten() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc1());
ki.commit();
// C2 meine+ /+w1:2,s0 &Erfahrung
// meine+ rewritten into meine.?
SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>(new RegexpQuery(new Term("tokens", "s:meine.?")));
SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true);
kr = ki.search(mdsq, (short) 10);
assertEquals(4, kr.getMatches().size());
}
Aggregations