use of org.apache.lucene.search.FieldDoc in project elasticsearch by elastic.
the class InternalTopHitsTests method createTestInstance.
@Override
protected InternalTopHits createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
int from = 0;
int requestedSize = between(1, 40);
int actualSize = between(0, requestedSize);
float maxScore = Float.MIN_VALUE;
ScoreDoc[] scoreDocs = new ScoreDoc[actualSize];
SearchHit[] hits = new SearchHit[actualSize];
Set<Integer> usedDocIds = new HashSet<>();
for (int i = 0; i < actualSize; i++) {
float score = randomFloat();
maxScore = max(maxScore, score);
int docId = randomValueOtherThanMany(usedDocIds::contains, () -> between(0, IndexWriter.MAX_DOCS));
usedDocIds.add(docId);
Map<String, SearchHitField> searchHitFields = new HashMap<>();
if (testInstancesLookSortedByField) {
Object[] fields = new Object[testInstancesSortFields.length];
for (int f = 0; f < testInstancesSortFields.length; f++) {
fields[f] = randomOfType(testInstancesSortFields[f].getType());
}
scoreDocs[i] = new FieldDoc(docId, score, fields);
} else {
scoreDocs[i] = new ScoreDoc(docId, score);
}
hits[i] = new SearchHit(docId, Integer.toString(i), new Text("test"), searchHitFields);
hits[i].score(score);
}
int totalHits = between(actualSize, 500000);
SearchHits searchHits = new SearchHits(hits, totalHits, maxScore);
TopDocs topDocs;
Arrays.sort(scoreDocs, scoreDocComparator());
if (testInstancesLookSortedByField) {
topDocs = new TopFieldDocs(totalHits, scoreDocs, testInstancesSortFields, maxScore);
} else {
topDocs = new TopDocs(totalHits, scoreDocs, maxScore);
}
return new InternalTopHits(name, from, requestedSize, topDocs, searchHits, pipelineAggregators, metaData);
}
use of org.apache.lucene.search.FieldDoc in project elasticsearch by elastic.
the class InternalTopHitsTests method assertReduced.
@Override
protected void assertReduced(InternalTopHits reduced, List<InternalTopHits> inputs) {
SearchHits actualHits = reduced.getHits();
List<Tuple<ScoreDoc, SearchHit>> allHits = new ArrayList<>();
float maxScore = Float.MIN_VALUE;
long totalHits = 0;
for (int input = 0; input < inputs.size(); input++) {
SearchHits internalHits = inputs.get(input).getHits();
totalHits += internalHits.getTotalHits();
maxScore = max(maxScore, internalHits.getMaxScore());
for (int i = 0; i < internalHits.internalHits().length; i++) {
ScoreDoc doc = inputs.get(input).getTopDocs().scoreDocs[i];
if (testInstancesLookSortedByField) {
doc = new FieldDoc(doc.doc, doc.score, ((FieldDoc) doc).fields, input);
} else {
doc = new ScoreDoc(doc.doc, doc.score, input);
}
allHits.add(new Tuple<>(doc, internalHits.internalHits()[i]));
}
}
allHits.sort(comparing(Tuple::v1, scoreDocComparator()));
SearchHit[] expectedHitsHits = new SearchHit[min(inputs.get(0).getSize(), allHits.size())];
for (int i = 0; i < expectedHitsHits.length; i++) {
expectedHitsHits[i] = allHits.get(i).v2();
}
SearchHits expectedHits = new SearchHits(expectedHitsHits, totalHits, maxScore);
assertEqualsWithErrorMessageFromXContent(expectedHits, actualHits);
}
use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testBiasedMixOfRandomUpdates.
public void testBiasedMixOfRandomUpdates() throws Exception {
// 3 types of operations: add, updated, updateDV.
// rather then randomizing equally, we'll pick (random) cutoffs so each test run is biased,
// in terms of some ops happen more often then others
final int ADD_CUTOFF = TestUtil.nextInt(random(), 1, 98);
final int UPD_CUTOFF = TestUtil.nextInt(random(), ADD_CUTOFF + 1, 99);
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);
final int numOperations = atLeast(1000);
final Map<Integer, Long> expected = new HashMap<>(numOperations / 3);
// start with at least one doc before any chance of updates
final int numSeedDocs = atLeast(1);
for (int i = 0; i < numSeedDocs; i++) {
final long val = random().nextLong();
expected.put(i, val);
writer.addDocument(doc(i, val));
}
int numDocUpdates = 0;
int numValueUpdates = 0;
for (int i = 0; i < numOperations; i++) {
final int op = TestUtil.nextInt(random(), 1, 100);
final long val = random().nextLong();
if (op <= ADD_CUTOFF) {
final int id = expected.size();
//System.out.println("TEST i=" + i + ": addDocument id=" + id + " val=" + val);
expected.put(id, val);
writer.addDocument(doc(id, val));
} else {
final int id = TestUtil.nextInt(random(), 0, expected.size() - 1);
expected.put(id, val);
if (op <= UPD_CUTOFF) {
numDocUpdates++;
//System.out.println("TEST i=" + i + ": updateDocument id=" + id + " val=" + val);
writer.updateDocument(new Term("id", "doc-" + id), doc(id, val));
} else {
numValueUpdates++;
//System.out.println("TEST i=" + i + ": updateDV id=" + id + " val=" + val);
writer.updateNumericDocValue(new Term("id", "doc-" + id), "val", val);
}
}
}
writer.commit();
final DirectoryReader reader = DirectoryReader.open(dir);
final IndexSearcher searcher = new IndexSearcher(reader);
// TODO: make more efficient if max numOperations is going to be increased much
for (Map.Entry<Integer, Long> expect : expected.entrySet()) {
String id = "doc-" + expect.getKey();
TopFieldDocs td = searcher.search(new TermQuery(new Term("id", id)), 1, new Sort(new SortField("val", SortField.Type.LONG)));
assertEquals(id + " missing?", 1, td.totalHits);
assertEquals(id + " value", expect.getValue(), ((FieldDoc) td.scoreDocs[0]).fields[0]);
}
IOUtils.close(reader, writer, dir);
}
use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testMultipleUpdatesSameDoc.
public void testMultipleUpdatesSameDoc() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// small number of docs, so use a tiny maxBufferedDocs
conf.setMaxBufferedDocs(3);
IndexWriter writer = new IndexWriter(dir, conf);
writer.updateDocument(new Term("id", "doc-1"), doc(1, 1000000000L));
writer.updateNumericDocValue(new Term("id", "doc-1"), "val", 1000001111L);
writer.updateDocument(new Term("id", "doc-2"), doc(2, 2000000000L));
writer.updateDocument(new Term("id", "doc-2"), doc(2, 2222222222L));
writer.updateNumericDocValue(new Term("id", "doc-1"), "val", 1111111111L);
writer.commit();
final DirectoryReader reader = DirectoryReader.open(dir);
final IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs td;
td = searcher.search(new TermQuery(new Term("id", "doc-1")), 1, new Sort(new SortField("val", SortField.Type.LONG)));
assertEquals("doc-1 missing?", 1, td.scoreDocs.length);
assertEquals("doc-1 value", 1111111111L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
td = searcher.search(new TermQuery(new Term("id", "doc-2")), 1, new Sort(new SortField("val", SortField.Type.LONG)));
assertEquals("doc-2 missing?", 1, td.scoreDocs.length);
assertEquals("doc-2 value", 2222222222L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
IOUtils.close(reader, writer, dir);
}
use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.
the class TestFieldCacheSortRandom method testRandomStringSort.
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
doc.add(new StringField("stringdv", s, Field.Store.NO));
docValues.add(new BytesRef(s));
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new IntPoint("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
Map<String, UninvertingReader.Type> mapping = new HashMap<>();
mapping.put("stringdv", Type.SORTED);
mapping.put("id", Type.INTEGER_POINT);
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
int queryType = random.nextInt(2);
if (queryType == 0) {
hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(), random.nextBoolean());
} else {
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for (int idx = 0; idx < expected.size(); idx++) {
BytesRef br = expected.get(idx);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount - 1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
// Normally, the old codecs (that don't support
// docsWithField via doc values) will always return
// an empty BytesRef for the missing case; however,
// if all docs in a given segment were missing, in
// that case it will return null! So we must map
// null here, too:
BytesRef br2 = (BytesRef) fd.fields[0];
if (br2 == null && missingIsNull == false) {
br2 = new BytesRef();
}
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
Aggregations