use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TokenSourcesTest method testOverlapWithOffsetExactPhrase.
public void testOverlapWithOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
// no positions!
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
// query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.
public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
// query.add(new SpanTermQuery(new Term(FIELD, "the")));
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class SpanNearClauseFactory method makeSpanClause.
public SpanQuery makeSpanClause() {
SpanQuery[] spanQueries = new SpanQuery[size()];
Iterator<SpanQuery> sqi = weightBySpanQuery.keySet().iterator();
int i = 0;
while (sqi.hasNext()) {
SpanQuery sq = sqi.next();
float boost = weightBySpanQuery.get(sq);
if (boost != 1f) {
sq = new SpanBoostQuery(sq, boost);
}
spanQueries[i++] = sq;
}
if (spanQueries.length == 1)
return spanQueries[0];
else
return new SpanOrQuery(spanQueries);
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class PayloadSpanUtil method queryToSpanQuery.
private void queryToSpanQuery(Query query, Collection<byte[]> payloads) throws IOException {
if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
queryToSpanQuery(clause.getQuery(), payloads);
}
}
} else if (query instanceof PhraseQuery) {
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = ((PhraseQuery) query).getSlop();
boolean inorder = false;
if (slop == 0) {
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
getPayloads(payloads, sp);
} else if (query instanceof TermQuery) {
SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
getPayloads(payloads, stq);
} else if (query instanceof SpanQuery) {
getPayloads(payloads, (SpanQuery) query);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext(); ) {
queryToSpanQuery(iterator.next(), payloads);
}
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "rawtypes", "unchecked" }) final List<Query>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<Query> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (final Term term : termArray) {
disjuncts.add(new SpanTermQuery(term));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<Query> disjuncts = disjunctLists[i];
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
getPayloads(payloads, sp);
}
}
}
use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterExtensibility method testUnifiedHighlighterExtensibility.
/**
* This test is for maintaining the extensibility of the UnifiedHighlighter
* for customizations out of package.
*/
@Test
public void testUnifiedHighlighterExtensibility() {
final int maxLength = 1000;
UnifiedHighlighter uh = new UnifiedHighlighter(null, new MockAnalyzer(random())) {
@Override
protected Map<String, Object[]> highlightFieldsAsObjects(String[] fieldsIn, Query query, int[] docIdsIn, int[] maxPassagesIn) throws IOException {
return super.highlightFieldsAsObjects(fieldsIn, query, docIdsIn, maxPassagesIn);
}
@Override
protected OffsetSource getOffsetSource(String field) {
return super.getOffsetSource(field);
}
@Override
protected BreakIterator getBreakIterator(String field) {
return super.getBreakIterator(field);
}
@Override
protected PassageScorer getScorer(String field) {
return super.getScorer(field);
}
@Override
protected PassageFormatter getFormatter(String field) {
return super.getFormatter(field);
}
@Override
public Analyzer getIndexAnalyzer() {
return super.getIndexAnalyzer();
}
@Override
public IndexSearcher getIndexSearcher() {
return super.getIndexSearcher();
}
@Override
protected int getMaxNoHighlightPassages(String field) {
return super.getMaxNoHighlightPassages(field);
}
@Override
protected Boolean requiresRewrite(SpanQuery spanQuery) {
return super.requiresRewrite(spanQuery);
}
@Override
protected LimitedStoredFieldVisitor newLimitedStoredFieldsVisitor(String[] fields) {
return super.newLimitedStoredFieldsVisitor(fields);
}
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
}
@Override
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
// THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
Set<HighlightFlag> highlightFlags = getFlags(field);
PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
return new CustomFieldHighlighter(field, getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags), new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR), getScorer(field), maxPassages, getMaxNoHighlightPassages(field), getFormatter(field));
}
@Override
protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, String field, BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Set<HighlightFlag> highlightFlags) {
return super.getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags);
}
@Override
public int getMaxLength() {
return maxLength;
}
};
assertEquals(uh.getMaxLength(), maxLength);
}
Aggregations