use of org.apache.lucene.search.spans.SpanOrQuery in project libresonic by Libresonic.
the class SearchService method getRandomSongs.
/**
* Returns a number of random songs.
*
* @param criteria Search criteria.
* @return List of random songs.
*/
public List<MediaFile> getRandomSongs(RandomSearchCriteria criteria) {
List<MediaFile> result = new ArrayList<MediaFile>();
IndexReader reader = null;
try {
reader = createIndexReader(SONG);
Searcher searcher = new IndexSearcher(reader);
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(FIELD_MEDIA_TYPE, MediaFile.MediaType.MUSIC.name().toLowerCase())), BooleanClause.Occur.MUST);
if (criteria.getGenre() != null) {
String genre = normalizeGenre(criteria.getGenre());
query.add(new TermQuery(new Term(FIELD_GENRE, genre)), BooleanClause.Occur.MUST);
}
if (criteria.getFromYear() != null || criteria.getToYear() != null) {
NumericRangeQuery<Integer> rangeQuery = NumericRangeQuery.newIntRange(FIELD_YEAR, criteria.getFromYear(), criteria.getToYear(), true, true);
query.add(rangeQuery, BooleanClause.Occur.MUST);
}
List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
for (MusicFolder musicFolder : criteria.getMusicFolders()) {
musicFolderQueries.add(new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath())));
}
query.add(new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST);
TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE);
List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs);
Random random = new Random(System.currentTimeMillis());
while (!scoreDocs.isEmpty() && result.size() < criteria.getCount()) {
int index = random.nextInt(scoreDocs.size());
Document doc = searcher.doc(scoreDocs.remove(index).doc);
int id = Integer.valueOf(doc.get(FIELD_ID));
try {
addIfNotNull(mediaFileService.getMediaFile(id), result);
} catch (Exception x) {
LOG.warn("Failed to get media file " + id);
}
}
} catch (Throwable x) {
LOG.error("Failed to search or random songs.", x);
} finally {
FileUtil.closeQuietly(reader);
}
return result;
}
use of org.apache.lucene.search.spans.SpanOrQuery in project libresonic by Libresonic.
the class SearchService method getRandomAlbums.
/**
* Returns a number of random albums.
*
* @param count Number of albums to return.
* @param musicFolders Only return albums from these folders.
* @return List of random albums.
*/
public List<MediaFile> getRandomAlbums(int count, List<MusicFolder> musicFolders) {
List<MediaFile> result = new ArrayList<MediaFile>();
IndexReader reader = null;
try {
reader = createIndexReader(ALBUM);
Searcher searcher = new IndexSearcher(reader);
List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
for (MusicFolder musicFolder : musicFolders) {
musicFolderQueries.add(new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath())));
}
Query query = new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()]));
TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE);
List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs);
Random random = new Random(System.currentTimeMillis());
while (!scoreDocs.isEmpty() && result.size() < count) {
int index = random.nextInt(scoreDocs.size());
Document doc = searcher.doc(scoreDocs.remove(index).doc);
int id = Integer.valueOf(doc.get(FIELD_ID));
try {
addIfNotNull(mediaFileService.getMediaFile(id), result);
} catch (Exception x) {
LOG.warn("Failed to get media file " + id, x);
}
}
} catch (Throwable x) {
LOG.error("Failed to search for random albums.", x);
} finally {
FileUtil.closeQuietly(reader);
}
return result;
}
use of org.apache.lucene.search.spans.SpanOrQuery in project lucene-solr by apache.
the class TestSimilarity2 method testCrazySpans.
/** make sure all sims work with spanOR(termX, termY) where termY does not exist */
public void testCrazySpans() throws Exception {
// The problem: "normal" lucene queries create scorers, returning null if terms dont exist
// This means they never score a term that does not exist.
// however with spans, there is only one scorer for the whole hierarchy:
// inner queries are not real queries, their boosts are ignored, etc.
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
doc.add(newField("foo", "bar", ft));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (Similarity sim : sims) {
is.setSimilarity(sim);
SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
Query query = new SpanOrQuery(s1, s2);
TopDocs td = is.search(query, 10);
assertEquals(1, td.totalHits);
float score = td.scoreDocs[0].score;
assertFalse("negative score for " + sim, score < 0.0f);
assertFalse("inf score for " + sim, Float.isInfinite(score));
assertFalse("nan score for " + sim, Float.isNaN(score));
}
ir.close();
dir.close();
}
use of org.apache.lucene.search.spans.SpanOrQuery in project lucene-solr by apache.
the class TestPayloadScoreQuery method testNestedNearQuery.
@Test
public void testNestedNearQuery() throws Exception {
// (one OR hundred) NEAR (twenty two) ~ 1
// 2 4 4 4
// one hundred twenty two
// two hundred twenty two
SpanNearQuery q = new SpanNearQuery(new SpanQuery[] { new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("field", "twenty")), new SpanTermQuery(new Term("field", "two")) }, 0, true) }, 1, true);
// check includeSpanScore makes a difference here
searcher.setSimilarity(new MultiplyingSimilarity());
try {
checkQuery(q, new MaxPayloadFunction(), new int[] { 122, 222 }, new float[] { 20.901256561279297f, 17.06580352783203f });
checkQuery(q, new MinPayloadFunction(), new int[] { 222, 122 }, new float[] { 17.06580352783203f, 10.450628280639648f });
checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[] { 19.15948486328125f, 17.06580352783203f });
checkQuery(q, new MaxPayloadFunction(), false, new int[] { 122, 222 }, new float[] { 4.0f, 4.0f });
checkQuery(q, new MinPayloadFunction(), false, new int[] { 222, 122 }, new float[] { 4.0f, 2.0f });
checkQuery(q, new AveragePayloadFunction(), false, new int[] { 222, 122 }, new float[] { 4.0f, 3.666666f });
} finally {
searcher.setSimilarity(similarity);
}
}
use of org.apache.lucene.search.spans.SpanOrQuery in project lucene-solr by apache.
the class MultiTermHighlighting method extractAutomata.
/**
* Extracts MultiTermQueries that match the provided field predicate.
* Returns equivalent automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
// TODO Lucene needs a Query visitor API! LUCENE-3041
List<CharacterRunAutomaton> list = new ArrayList<>();
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof BoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanBoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (fieldMatcher.test(prefix.field())) {
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {
@Override
public String toString() {
return pq.toString();
}
});
}
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fieldMatcher.test(fq.getField())) {
String utf16 = fq.getTerm().text();
int[] termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
int termLength = termText.length;
int prefixLength = Math.min(fq.getPrefixLength(), termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
list.add(new CharacterRunAutomaton(automaton) {
@Override
public String toString() {
return fq.toString();
}
});
}
} else if (query instanceof TermRangeQuery) {
final TermRangeQuery tq = (TermRangeQuery) query;
if (fieldMatcher.test(tq.getField())) {
final CharsRef lowerBound;
if (tq.getLowerTerm() == null) {
lowerBound = null;
} else {
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
}
final CharsRef upperBound;
if (tq.getUpperTerm() == null) {
upperBound = null;
} else {
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
}
final boolean includeLower = tq.includesLower();
final boolean includeUpper = tq.includesUpper();
final CharsRef scratch = new CharsRef();
@SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
// this is *not* an automaton, but its very simple
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
@Override
public boolean run(char[] s, int offset, int length) {
scratch.chars = s;
scratch.offset = offset;
scratch.length = length;
if (lowerBound != null) {
int cmp = comparator.compare(scratch, lowerBound);
if (cmp < 0 || (!includeLower && cmp == 0)) {
return false;
}
}
if (upperBound != null) {
int cmp = comparator.compare(scratch, upperBound);
if (cmp > 0 || (!includeUpper && cmp == 0)) {
return false;
}
}
return true;
}
@Override
public String toString() {
return tq.toString();
}
});
}
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (fieldMatcher.test(aq.getField())) {
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
}
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Aggregations