use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.
the class SynonymTokenizer method testGetBestFragmentsFilteredQuery.
public void testGetBestFragmentsFilteredQuery() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
SpanQuery[] clauses = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")) };
SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(snq, Occur.MUST);
bq.add(TermRangeQuery.newStringRange("contents", "john", "john", true, true), Occur.FILTER);
doSearching(bq.build());
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
// Currently highlights "John" and "Kennedy" separately
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
};
helper.start();
}
use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testConcurrentSpan.
public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
indexSearcher.search(phraseQuery, new SimpleCollector() {
private int baseDoc;
@Override
public void collect(int i) {
bitset.set(this.baseDoc + i);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.baseDoc = context.docBase;
}
@Override
public void setScorer(org.apache.lucene.search.Scorer scorer) {
// Do Nothing
}
@Override
public boolean needsScores() {
return false;
}
});
assertEquals(1, bitset.cardinality());
final int maxDoc = indexReader.maxDoc();
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
for (int position = bitset.nextSetBit(0); position < maxDoc - 1; position = bitset.nextSetBit(position + 1)) {
assertEquals(0, position);
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(position), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
}
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.
the class MultiTermHighlighting method extractAutomata.
/**
* Extracts MultiTermQueries that match the provided field predicate.
* Returns equivalent automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
// TODO Lucene needs a Query visitor API! LUCENE-3041
List<CharacterRunAutomaton> list = new ArrayList<>();
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof BoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanBoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (fieldMatcher.test(prefix.field())) {
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {
@Override
public String toString() {
return pq.toString();
}
});
}
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fieldMatcher.test(fq.getField())) {
String utf16 = fq.getTerm().text();
int[] termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
int termLength = termText.length;
int prefixLength = Math.min(fq.getPrefixLength(), termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
list.add(new CharacterRunAutomaton(automaton) {
@Override
public String toString() {
return fq.toString();
}
});
}
} else if (query instanceof TermRangeQuery) {
final TermRangeQuery tq = (TermRangeQuery) query;
if (fieldMatcher.test(tq.getField())) {
final CharsRef lowerBound;
if (tq.getLowerTerm() == null) {
lowerBound = null;
} else {
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
}
final CharsRef upperBound;
if (tq.getUpperTerm() == null) {
upperBound = null;
} else {
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
}
final boolean includeLower = tq.includesLower();
final boolean includeUpper = tq.includesUpper();
final CharsRef scratch = new CharsRef();
@SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
// this is *not* an automaton, but its very simple
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
@Override
public boolean run(char[] s, int offset, int length) {
scratch.chars = s;
scratch.offset = offset;
scratch.length = length;
if (lowerBound != null) {
int cmp = comparator.compare(scratch, lowerBound);
if (cmp < 0 || (!includeLower && cmp == 0)) {
return false;
}
}
if (upperBound != null) {
int cmp = comparator.compare(scratch, upperBound);
if (cmp > 0 || (!includeUpper && cmp == 0)) {
return false;
}
}
return true;
}
@Override
public String toString() {
return tq.toString();
}
});
}
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (fieldMatcher.test(aq.getField())) {
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
}
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.
the class SynonymTokenizer method testNearSpanSimpleQuery.
public void testNearSpanSimpleQuery() throws Exception {
doSearching(new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "beginning")), new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
mode = QUERY;
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
}
};
helper.run();
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.
the class SpanNearBuilder method getSpanQuery.
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String slopString = DOMUtils.getAttributeOrFail(e, "slop");
int slop = Integer.parseInt(slopString);
boolean inOrder = DOMUtils.getAttribute(e, "inOrder", false);
List<SpanQuery> spans = new ArrayList<>();
for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) {
if (kid.getNodeType() == Node.ELEMENT_NODE) {
spans.add(factory.getSpanQuery((Element) kid));
}
}
SpanQuery[] spanQueries = spans.toArray(new SpanQuery[spans.size()]);
SpanQuery snq = new SpanNearQuery(spanQueries, slop, inOrder);
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new SpanBoostQuery(snq, boost);
}
Aggregations