use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class SynonymTokenizer method testGetBestFragmentsFilteredQuery.
public void testGetBestFragmentsFilteredQuery() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
SpanQuery[] clauses = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")) };
SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(snq, Occur.MUST);
bq.add(TermRangeQuery.newStringRange("contents", "john", "john", true, true), Occur.FILTER);
doSearching(bq.build());
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
// Currently highlights "John" and "Kennedy" separately
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
};
helper.start();
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class HighlighterPhraseTest method testConcurrentSpan.
public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
indexSearcher.search(phraseQuery, new SimpleCollector() {
private int baseDoc;
@Override
public void collect(int i) {
bitset.set(this.baseDoc + i);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.baseDoc = context.docBase;
}
@Override
public void setScorer(org.apache.lucene.search.Scorer scorer) {
// Do Nothing
}
@Override
public boolean needsScores() {
return false;
}
});
assertEquals(1, bitset.cardinality());
final int maxDoc = indexReader.maxDoc();
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
for (int position = bitset.nextSetBit(0); position < maxDoc - 1; position = bitset.nextSetBit(position + 1)) {
assertEquals(0, position);
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(position), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
}
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extractWeightedSpanTerms.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>SpanQuery</code>.
*
* @param terms
* Map to place created WeightedSpanTerms in
* @param spanQuery
* SpanQuery to extract Terms from
* @throws IOException If there is a low-level I/O error
*/
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery, float boost) throws IOException {
Set<String> fieldNames;
if (fieldName == null) {
fieldNames = new HashSet<>();
collectSpanQueryFields(spanQuery, fieldNames);
} else {
fieldNames = new HashSet<>(1);
fieldNames.add(fieldName);
}
// To support the use of the default field name
if (defaultField != null) {
fieldNames.add(defaultField);
}
Map<String, SpanQuery> queries = new HashMap<>();
Set<Term> nonWeightedTerms = new HashSet<>();
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
final IndexSearcher searcher = new IndexSearcher(getLeafContext());
searcher.setQueryCache(null);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
}
} else {
spanQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
}
List<PositionSpan> spanPositions = new ArrayList<>();
for (final String field : fieldNames) {
final SpanQuery q;
if (mustRewriteQuery) {
q = queries.get(field);
} else {
q = spanQuery;
}
LeafReaderContext context = getLeafContext();
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false);
Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
if (spans == null) {
return;
}
// collect span positions
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
continue;
}
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
}
}
}
if (spanPositions.size() == 0) {
// no spans found
return;
}
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
if (weightedSpanTerm == null) {
weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true;
terms.put(queryTerm.text(), weightedSpanTerm);
} else {
if (spanPositions.size() > 0) {
weightedSpanTerm.addPositionSpans(spanPositions);
}
}
}
}
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class MemoryIndexOffsetStrategy method buildCombinedAutomaton.
/**
* Build one {@link CharacterRunAutomaton} matching any term the query might match.
*/
private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher, BytesRef[] terms, CharacterRunAutomaton[] automata, PhraseHelper strictPhrases, Function<Query, Collection<Query>> multiTermQueryRewrite) {
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
if (terms.length > 0) {
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
}
Collections.addAll(allAutomata, automata);
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
Collections.addAll(allAutomata, //true==lookInSpan
MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));
}
if (allAutomata.size() == 1) {
return allAutomata.get(0);
}
// Return an aggregate CharacterRunAutomaton of others
return new // the makeEmpty() is bogus; won't be used
CharacterRunAutomaton(// the makeEmpty() is bogus; won't be used
Automata.makeEmpty()) {
@Override
public boolean run(char[] chars, int offset, int length) {
for (int i = 0; i < allAutomata.size(); i++) {
// don't use foreach to avoid Iterator allocation
if (allAutomata.get(i).run(chars, offset, length)) {
return true;
}
}
return false;
}
};
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class SynonymTokenizer method testNearSpanSimpleQuery.
public void testNearSpanSimpleQuery() throws Exception {
doSearching(new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "beginning")), new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
mode = QUERY;
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
}
};
helper.run();
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2);
}
Aggregations