use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class FuzzyLikeThisQueryTest method testMultiWord.
//Test multiple input words are having variants produced
public void testMultiWord() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("jonathin smoth", "name", 2, 1);
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
TopDocs topDocs = searcher.search(flt, 1);
ScoreDoc[] sd = topDocs.scoreDocs;
assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
Document doc = searcher.doc(sd[0].doc);
assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class FuzzyLikeThisQueryTest method testClosestEditDistanceMatchComesFirst.
//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("smith", "name", 2, 1);
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
assertTrue("Should have variant smythe", queryTerms.contains(new Term("name", "smythe")));
assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
assertTrue("Should have variant smyth", queryTerms.contains(new Term("name", "smyth")));
TopDocs topDocs = searcher.search(flt, 1);
ScoreDoc[] sd = topDocs.scoreDocs;
assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
Document doc = searcher.doc(sd[0].doc);
assertEquals("Should match most similar not most rare variant", "2", doc.get("id"));
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testDocValuesSimple.
/*
* Simple test case to show how to use the API
*/
public void testDocValuesSimple() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("docId", i));
doc.add(new TextField("docId", "" + i, Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1, true);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId");
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
assertEquals(i, docValues.advance(i));
assertEquals(i, docValues.longValue());
}
reader.close();
dir.close();
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class ExpandComponent method process.
@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doExpand) {
return;
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String field = params.get(ExpandParams.EXPAND_FIELD);
String hint = null;
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
field = cp.getField();
hint = cp.hint;
}
}
}
}
if (field == null) {
throw new IOException("Expand field is null.");
}
String sortParam = params.get(ExpandParams.EXPAND_SORT);
String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
String qs = params.get(ExpandParams.EXPAND_Q);
int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
Sort sort = null;
if (sortParam != null) {
sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
}
Query query;
if (qs == null) {
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
}
}
List<Query> newFilters = new ArrayList<>();
if (fqs == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
newFilters.add(q);
}
}
}
} else {
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
SolrIndexSearcher searcher = req.getSearcher();
LeafReader reader = searcher.getSlowAtomicReader();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
SortedDocValues values = null;
long nullValue = 0L;
if (fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
} else {
//Get the nullValue for the numeric collapse field
String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
final NumberType numType = fieldType.getNumberType();
// we don't need to handle invalid 64-bit field types here.
if (defaultValue != null) {
if (numType == NumberType.INTEGER) {
nullValue = Long.parseLong(defaultValue);
} else if (numType == NumberType.FLOAT) {
nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
}
} else if (NumberType.FLOAT.equals(numType)) {
// Integer case already handled by nullValue defaulting to 0
nullValue = Float.floatToIntBits(0.0f);
}
}
FixedBitSet groupBits = null;
LongHashSet groupSet = null;
DocList docList = rb.getResults().docList;
IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
//Gather the groups for the current page of documents
DocIterator idit = docList.iterator();
int[] globalDocs = new int[docList.size()];
int docsIndex = -1;
while (idit.hasNext()) {
globalDocs[++docsIndex] = idit.nextDoc();
}
Arrays.sort(globalDocs);
Query groupQuery = null;
/*
* This code gathers the group information for the current page.
*/
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
if (contexts.size() == 0) {
//When no context is available we can skip the expanding
return;
}
int currentContext = 0;
int currentDocBase = contexts.get(currentContext).docBase;
int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
IntObjectHashMap<BytesRef> ordBytes = null;
if (values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
if (values instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
int count = 0;
ordBytes = new IntObjectHashMap<>();
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
if (ordinalMap != null) {
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
}
int contextDoc = globalDoc - currentDocBase;
if (ordinalMap != null) {
if (contextDoc > currentValues.docID()) {
currentValues.advance(contextDoc);
}
if (contextDoc == currentValues.docID()) {
int ord = currentValues.ordValue();
++count;
BytesRef ref = currentValues.lookupOrd(ord);
ord = (int) segmentOrdinalMap.get(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
} else {
if (globalDoc > values.docID()) {
values.advance(globalDoc);
}
if (globalDoc == values.docID()) {
int ord = values.ordValue();
++count;
BytesRef ref = values.lookupOrd(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
}
}
if (count > 0 && count < 200) {
try {
groupQuery = getGroupQuery(field, count, ordBytes);
} catch (Exception e) {
throw new IOException(e);
}
}
} else {
groupSet = new LongHashSet(docList.size());
NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
int count = 0;
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
}
int contextDoc = globalDoc - currentDocBase;
int valueDocID = collapseValues.docID();
if (valueDocID < contextDoc) {
valueDocID = collapseValues.advance(contextDoc);
}
long value;
if (valueDocID == contextDoc) {
value = collapseValues.longValue();
} else {
value = 0;
}
if (value != nullValue) {
++count;
groupSet.add(value);
collapsedSet.add(globalDoc);
}
}
if (count > 0 && count < 200) {
if (fieldType.isPointField()) {
groupQuery = getPointGroupQuery(schemaField, count, groupSet);
} else {
groupQuery = getGroupQuery(field, fieldType, count, groupSet);
}
}
}
Collector collector;
if (sort != null)
sort = sort.rewrite(searcher);
Collector groupExpandCollector = null;
if (values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
} else {
groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
}
if (groupQuery != null) {
//Limits the results to documents that are in the same group as the documents in the page.
newFilters.add(groupQuery);
}
SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
if (pfilter.postFilter != null) {
pfilter.postFilter.setLastDelegate(groupExpandCollector);
collector = pfilter.postFilter;
} else {
collector = groupExpandCollector;
}
if (pfilter.filter == null) {
searcher.search(query, collector);
} else {
Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
searcher.search(q, collector);
}
LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
NamedList outMap = new SimpleOrderedMap();
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
if (fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int) groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
}
rb.rsp.add("expanded", outMap);
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class AllGroupHeadsCollectorTest method testRandom.
public void testRandom() throws Exception {
int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
for (int iter = 0; iter < numberOfRuns; iter++) {
if (VERBOSE) {
System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
}
final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
}
final List<BytesRef> groups = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
String randomValue;
do {
// B/c of DV based impl we can't see the difference between an empty string and a null value.
// For that reason we don't generate empty string groups.
randomValue = TestUtil.randomRealisticUnicodeString(random());
//randomValue = TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
}
final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
final StringBuilder sb = new StringBuilder();
sb.append("real").append(random().nextInt(3)).append(' ');
final int fakeCount = random().nextInt(10);
for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
sb.append("fake ");
}
contentStrings[contentIDX] = sb.toString();
if (VERBOSE) {
System.out.println(" content=" + sb.toString());
}
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
DocValuesType valueType = DocValuesType.SORTED;
Document doc = new Document();
Document docNoGroup = new Document();
Field valuesField = null;
valuesField = new SortedDocValuesField("group", new BytesRef());
doc.add(valuesField);
Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
doc.add(sort1);
docNoGroup.add(sort1);
Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
doc.add(sort2);
docNoGroup.add(sort2);
Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
doc.add(sort3);
docNoGroup.add(sort3);
Field content = newTextField("content", "", Field.Store.NO);
doc.add(content);
docNoGroup.add(content);
NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
doc.add(idDV);
docNoGroup.add(idDV);
final GroupDoc[] groupDocs = new GroupDoc[numDocs];
for (int i = 0; i < numDocs; i++) {
final BytesRef groupValue;
if (random().nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = null;
} else {
groupValue = groups.get(random().nextInt(groups.size()));
}
final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), new BytesRef(String.format(Locale.ROOT, "%05d", i)), contentStrings[random().nextInt(contentStrings.length)]);
if (VERBOSE) {
System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString() + " sort3=" + groupDoc.sort3.utf8ToString());
}
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
}
sort1.setBytesValue(groupDoc.sort1);
sort2.setBytesValue(groupDoc.sort2);
sort3.setBytesValue(groupDoc.sort3);
content.setStringValue(groupDoc.content);
idDV.setLongValue(groupDoc.id);
if (groupDoc.group == null) {
w.addDocument(docNoGroup);
} else {
w.addDocument(doc);
}
}
final DirectoryReader r = w.getReader();
w.close();
NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
final int[] docIDToFieldId = new int[numDocs];
final int[] fieldIdToDocID = new int[numDocs];
for (int i = 0; i < numDocs; i++) {
assertEquals(i, values.nextDoc());
int fieldId = (int) values.longValue();
docIDToFieldId[i] = fieldId;
fieldIdToDocID[fieldId] = i;
}
final IndexSearcher s = newSearcher(r);
Set<Integer> seenIDs = new HashSet<>();
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
int idValue = docIDToFieldId[hit.doc];
final GroupDoc gd = groupDocs[idValue];
assertEquals(gd.id, idValue);
seenIDs.add(idValue);
assertTrue(gd.score == 0.0);
gd.score = hit.score;
}
}
// make sure all groups were seen across the hits
assertEquals(groupDocs.length, seenIDs.size());
// make sure scores are sane
for (GroupDoc gd : groupDocs) {
assertTrue(Float.isFinite(gd.score));
assertTrue(gd.score >= 0.0);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
boolean sortByScoreOnly = random().nextBoolean();
Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
// The actual group heads contains Lucene ids. Need to change them into our id value.
for (int i = 0; i < actualGroupHeads.length; i++) {
actualGroupHeads[i] = docIDToFieldId[actualGroupHeads[i]];
}
// Allows us the easily iterate and assert the actual and expected results.
Arrays.sort(expectedGroupHeads);
Arrays.sort(actualGroupHeads);
if (VERBOSE) {
System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
System.out.println("Sort within group: " + sortWithinGroup);
System.out.println("Num group: " + numGroups);
System.out.println("Num doc: " + numDocs);
System.out.println("\n=== Expected: \n");
for (int expectedDocId : expectedGroupHeads) {
GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(), expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId));
}
System.out.println("\n=== Actual: \n");
for (int actualDocId : actualGroupHeads) {
GroupDoc actualGroupDoc = groupDocs[actualDocId];
String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(), actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId));
}
System.out.println("\n===================================================================================");
}
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
r.close();
dir.close();
}
}
Aggregations