use of org.apache.lucene.search.SimpleCollector in project lucene-solr by apache.
the class TestDrillSideways method testRandom.
public void testRandom() throws Exception {
while (aChance == 0.0) {
aChance = random().nextDouble();
}
while (bChance == 0.0) {
bChance = random().nextDouble();
}
while (cChance == 0.0) {
cChance = random().nextDouble();
}
//aChance = .01;
//bChance = 0.5;
//cChance = 1.0;
double sum = aChance + bChance + cChance;
aChance /= sum;
bChance /= sum;
cChance /= sum;
int numDims = TestUtil.nextInt(random(), 2, 5);
//int numDims = 3;
int numDocs = atLeast(3000);
//int numDocs = 20;
if (VERBOSE) {
System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
}
String[][] dimValues = new String[numDims][];
int valueCount = 2;
for (int dim = 0; dim < numDims; dim++) {
Set<String> values = new HashSet<>();
while (values.size() < valueCount) {
String s = TestUtil.randomRealisticUnicodeString(random());
//String s = _TestUtil.randomString(random());
if (s.length() > 0) {
values.add(s);
}
}
dimValues[dim] = values.toArray(new String[values.size()]);
valueCount *= 2;
}
List<Doc> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Doc doc = new Doc();
doc.id = "" + i;
doc.contentToken = randomContentToken(false);
doc.dims = new int[numDims];
doc.dims2 = new int[numDims];
for (int dim = 0; dim < numDims; dim++) {
if (random().nextInt(5) == 3) {
// This doc is missing this dim:
doc.dims[dim] = -1;
} else if (dimValues[dim].length <= 4) {
int dimUpto = 0;
doc.dims[dim] = dimValues[dim].length - 1;
while (dimUpto < dimValues[dim].length) {
if (random().nextBoolean()) {
doc.dims[dim] = dimUpto;
break;
}
dimUpto++;
}
} else {
doc.dims[dim] = random().nextInt(dimValues[dim].length);
}
if (random().nextInt(5) == 3) {
// 2nd value:
doc.dims2[dim] = random().nextInt(dimValues[dim].length);
} else {
doc.dims2[dim] = -1;
}
}
docs.add(doc);
}
Directory d = newDirectory();
Directory td = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setInfoStream(InfoStream.NO_OUTPUT);
RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
for (int i = 0; i < numDims; i++) {
config.setMultiValued("dim" + i, true);
}
boolean doUseDV = random().nextBoolean();
for (Doc rawDoc : docs) {
Document doc = new Document();
doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
if (VERBOSE) {
System.out.println(" doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
}
for (int dim = 0; dim < numDims; dim++) {
int dimValue = rawDoc.dims[dim];
if (dimValue != -1) {
if (doUseDV) {
doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
} else {
doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
}
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
}
}
int dimValue2 = rawDoc.dims2[dim];
if (dimValue2 != -1) {
if (doUseDV) {
doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
} else {
doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
}
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
}
}
}
w.addDocument(config.build(tw, doc));
}
if (random().nextBoolean()) {
// Randomly delete a few docs:
int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
if (VERBOSE) {
System.out.println("delete " + numDel);
}
int delCount = 0;
while (delCount < numDel) {
Doc doc = docs.get(random().nextInt(docs.size()));
if (!doc.deleted) {
if (VERBOSE) {
System.out.println(" delete id=" + doc.id);
}
doc.deleted = true;
w.deleteDocuments(new Term("id", doc.id));
delCount++;
}
}
}
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: forceMerge(1)...");
}
w.forceMerge(1);
}
IndexReader r = w.getReader();
final SortedSetDocValuesReaderState sortedSetDVState;
IndexSearcher s = newSearcher(r);
if (doUseDV) {
sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
} else {
sortedSetDVState = null;
}
if (VERBOSE) {
System.out.println("r.numDocs() = " + r.numDocs());
}
// NRT open
TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
int numIters = atLeast(10);
for (int iter = 0; iter < numIters; iter++) {
String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
}
String[][] drillDowns = new String[numDims][];
int count = 0;
boolean anyMultiValuedDrillDowns = false;
while (count < numDrillDown) {
int dim = random().nextInt(numDims);
if (drillDowns[dim] == null) {
if (random().nextBoolean()) {
// Drill down on one value:
drillDowns[dim] = new String[] { dimValues[dim][random().nextInt(dimValues[dim].length)] };
} else {
int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
drillDowns[dim] = new String[orCount];
anyMultiValuedDrillDowns |= orCount > 1;
for (int i = 0; i < orCount; i++) {
while (true) {
String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
for (int j = 0; j < i; j++) {
if (value.equals(drillDowns[dim][j])) {
value = null;
break;
}
}
if (value != null) {
drillDowns[dim][i] = value;
break;
}
}
}
}
if (VERBOSE) {
BytesRef[] values = new BytesRef[drillDowns[dim].length];
for (int i = 0; i < values.length; i++) {
values[i] = new BytesRef(drillDowns[dim][i]);
}
System.out.println(" dim" + dim + "=" + Arrays.toString(values));
}
count++;
}
}
Query baseQuery;
if (contentToken == null) {
baseQuery = new MatchAllDocsQuery();
} else {
baseQuery = new TermQuery(new Term("content", contentToken));
}
DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
for (int dim = 0; dim < numDims; dim++) {
if (drillDowns[dim] != null) {
for (String value : drillDowns[dim]) {
ddq.add("dim" + dim, value);
}
}
}
Query filter;
if (random().nextInt(7) == 6) {
if (VERBOSE) {
System.out.println(" only-even filter");
}
filter = new Query() {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
return new ConstantScoreScorer(this, score(), new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
int docID = approximation.docID();
return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
}
@Override
public float matchCost() {
return 1000f;
}
});
}
};
}
@Override
public String toString(String field) {
return "drillSidewaysTestFilter";
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
} else {
filter = null;
}
// Verify docs are always collected in order. If we
// had an AssertingScorer it could catch it when
// Weight.scoresDocsOutOfOrder lies!:
getNewDrillSideways(s, config, tr).search(ddq, new SimpleCollector() {
int lastDocID;
@Override
public void collect(int doc) {
assert doc > lastDocID;
lastDocID = doc;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
lastDocID = -1;
}
@Override
public boolean needsScores() {
return false;
}
});
// subScorers are on the same docID:
if (!anyMultiValuedDrillDowns) {
// Can only do this test when there are no OR'd
// drill-down values, because in that case it's
// easily possible for one of the DD terms to be on
// a future docID:
getNewDrillSidewaysScoreSubdocsAtOnce(s, config, tr).search(ddq, new AssertingSubDocsAtOnceCollector());
}
TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
DrillSideways ds;
if (doUseDV) {
ds = getNewDrillSideways(s, config, sortedSetDVState);
} else {
ds = getNewDrillSidewaysBuildFacetsResult(s, config, tr);
}
// Retrieve all facets:
DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
TopDocs hits = s.search(baseQuery, numDocs);
Map<String, Float> scores = new HashMap<>();
for (ScoreDoc sd : hits.scoreDocs) {
scores.put(s.doc(sd.doc).get("id"), sd.score);
}
if (VERBOSE) {
System.out.println(" verify all facets");
}
verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
// Make sure drill down doesn't change score:
Query q = ddq;
if (filter != null) {
q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build();
}
TopDocs ddqHits = s.search(q, numDocs);
assertEquals(expected.hits.size(), ddqHits.totalHits);
for (int i = 0; i < expected.hits.size(); i++) {
// Score should be IDENTICAL:
assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
}
}
w.close();
IOUtils.close(r, tr, tw, d, td);
}
use of org.apache.lucene.search.SimpleCollector in project lucene-solr by apache.
the class TestGeo3DPoint method verify.
private static void verify(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
GeoPoint[] points = new GeoPoint[lats.length];
GeoPoint[] unquantizedPoints = new GeoPoint[lats.length];
// Pre-quantize all lat/lons:
for (int i = 0; i < lats.length; i++) {
if (Double.isNaN(lats[i]) == false) {
//System.out.println("lats[" + i + "] = " + lats[i]);
unquantizedPoints[i] = new GeoPoint(PlanetModel.WGS84, toRadians(lats[i]), toRadians(lons[i]));
points[i] = quantize(unquantizedPoints[i]);
}
}
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < points.length / 100) {
iwc.setMaxBufferedDocs(points.length / 100);
}
iwc.setCodec(getCodec());
Directory dir;
if (points.length > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < points.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
GeoPoint point = points[id];
if (point != null) {
doc.add(new Geo3DPoint("point", point.x, point.y, point.z));
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.err.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
if (VERBOSE) {
System.out.println("TEST: using reader " + r);
}
w.close();
// We can't wrap with "exotic" readers because the geo3d query must see the Geo3DDVFormat:
IndexSearcher s = newSearcher(r, false);
final int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
/*
GeoShape shape = randomShape();
if (VERBOSE) {
System.err.println("\nTEST: iter=" + iter + " shape="+shape);
}
*/
// Geo3DPoint.newShapeQuery("point", shape);
Query query = random3DQuery("point");
if (VERBOSE) {
System.err.println(" using query: " + query);
}
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
if (VERBOSE) {
System.err.println(" hitCount: " + hits.cardinality());
}
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < r.maxDoc(); docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
GeoPoint point = points[id];
GeoPoint unquantizedPoint = unquantizedPoints[id];
if (point != null && unquantizedPoint != null) {
GeoShape shape = ((PointInGeo3DShapeQuery) query).getShape();
XYZBounds bounds = new XYZBounds();
shape.getBounds(bounds);
XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
boolean expected = ((deleted.contains(id) == false) && shape.isWithin(point));
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should have matched but did not\n");
} else {
b.append("FAIL: id=" + id + " should not have matched but did\n");
}
b.append(" shape=" + shape + "\n");
b.append(" bounds=" + bounds + "\n");
b.append(" world bounds=(" + " minX=" + PlanetModel.WGS84.getMinimumXValue() + " maxX=" + PlanetModel.WGS84.getMaximumXValue() + " minY=" + PlanetModel.WGS84.getMinimumYValue() + " maxY=" + PlanetModel.WGS84.getMaximumYValue() + " minZ=" + PlanetModel.WGS84.getMinimumZValue() + " maxZ=" + PlanetModel.WGS84.getMaximumZValue() + "\n");
b.append(" quantized point=" + point + " within shape? " + shape.isWithin(point) + " within bounds? " + solid.isWithin(point) + "\n");
b.append(" unquantized point=" + unquantizedPoint + " within shape? " + shape.isWithin(unquantizedPoint) + " within bounds? " + solid.isWithin(unquantizedPoint) + "\n");
b.append(" docID=" + docID + " deleted?=" + deleted.contains(id) + "\n");
b.append(" query=" + query + "\n");
b.append(" explanation:\n " + explain("point", shape, point, unquantizedPoint, r, docID).replace("\n", "\n "));
fail(b.toString());
}
} else {
assertFalse(hits.get(docID));
}
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.search.SimpleCollector in project lucene-solr by apache.
the class TestJoinUtil method createContext.
private IndexIterationContext createContext(int nDocs, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
if (globalOrdinalJoin) {
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
}
Directory dir = newDirectory();
final Random random = random();
RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / RandomNumbers.randomIntBetween(random, 1, 4);
context.randomUniqueValues = new String[numRandomValues];
Set<String> trackSet = new HashSet<>();
context.randomFrom = new boolean[numRandomValues];
for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue;
do {
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
final int nextInt = random.nextInt(Integer.MAX_VALUE);
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue, 16);
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue);
context.randomFrom[i] = random.nextBoolean();
context.randomUniqueValues[i] = uniqueRandomValue;
}
List<String> randomUniqueValuesReplica = new ArrayList<>(Arrays.asList(context.randomUniqueValues));
RandomDoc[] docs = new RandomDoc[nDocs];
for (int i = 0; i < nDocs; i++) {
String id = Integer.toString(i);
int randomI = random.nextInt(context.randomUniqueValues.length);
String value = context.randomUniqueValues[randomI];
Document document = new Document();
document.add(newTextField(random, "id", id, Field.Store.YES));
document.add(newTextField(random, "value", value, Field.Store.NO));
boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? Math.min(2 + random.nextInt(10), context.randomUniqueValues.length) : 1;
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
if (globalOrdinalJoin) {
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
}
final List<String> subValues;
{
int start = randomUniqueValuesReplica.size() == numberOfLinkValues ? 0 : random.nextInt(randomUniqueValuesReplica.size() - numberOfLinkValues);
subValues = randomUniqueValuesReplica.subList(start, start + numberOfLinkValues);
Collections.shuffle(subValues, random);
}
for (String linkValue : subValues) {
assert !docs[i].linkValues.contains(linkValue);
docs[i].linkValues.add(linkValue);
if (from) {
if (!context.fromDocuments.containsKey(linkValue)) {
context.fromDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueFromDocs.containsKey(value)) {
context.randomValueFromDocs.put(value, new ArrayList<>());
}
context.fromDocuments.get(linkValue).add(docs[i]);
context.randomValueFromDocs.get(value).add(docs[i]);
addLinkFields(random, document, "from", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
} else {
if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueToDocs.containsKey(value)) {
context.randomValueToDocs.put(value, new ArrayList<>());
}
context.toDocuments.get(linkValue).add(docs[i]);
context.randomValueToDocs.get(value).add(docs[i]);
addLinkFields(random, document, "to", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
}
}
w.addDocument(document);
if (random.nextInt(10) == 4) {
w.commit();
}
if (VERBOSE) {
System.out.println("Added document[" + docs[i].id + "]: " + document);
}
}
if (random.nextBoolean()) {
w.forceMerge(1);
}
w.close();
// Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
// any ScoreMode.
DirectoryReader topLevelReader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(topLevelReader);
for (int i = 0; i < context.randomUniqueValues.length; i++) {
String uniqueRandomValue = context.randomUniqueValues[i];
final String fromField;
final String toField;
final Map<String, Map<Integer, JoinScore>> queryVals;
if (context.randomFrom[i]) {
fromField = "from";
toField = "to";
queryVals = context.fromHitsToJoinScore;
} else {
fromField = "to";
toField = "from";
queryVals = context.toHitsToJoinScore;
}
final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<>();
if (multipleValuesPerDocument) {
searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {
private Scorer scorer;
private SortedSetDocValues docTermOrds;
@Override
public void collect(int doc) throws IOException {
if (doc > docTermOrds.docID()) {
docTermOrds.advance(doc);
}
if (doc == docTermOrds.docID()) {
long ord;
while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
final BytesRef joinValue = docTermOrds.lookupOrd(ord);
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
joinScore.addScore(scorer.score());
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docTermOrds = DocValues.getSortedSet(context.reader(), fromField);
}
@Override
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return true;
}
});
} else {
searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {
private Scorer scorer;
private BinaryDocValues terms;
@Override
public void collect(int doc) throws IOException {
if (doc > terms.docID()) {
terms.advance(doc);
}
final BytesRef joinValue;
if (doc == terms.docID()) {
joinValue = terms.binaryValue();
} else {
// missing;
return;
}
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
if (VERBOSE) {
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
}
joinScore.addScore(scorer.score());
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
terms = DocValues.getBinary(context.reader(), fromField);
}
@Override
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return true;
}
});
}
final Map<Integer, JoinScore> docToJoinScore = new HashMap<>();
if (multipleValuesPerDocument) {
Terms terms = MultiFields.getTerms(topLevelReader, toField);
if (terms != null) {
PostingsEnum postingsEnum = null;
SortedSet<BytesRef> joinValues = new TreeSet<>();
joinValues.addAll(joinValueToJoinScores.keySet());
for (BytesRef joinValue : joinValues) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(joinValue)) {
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
// Something to keep in mind for many-to-many relations.
if (!docToJoinScore.containsKey(doc)) {
docToJoinScore.put(doc, joinScore);
}
}
}
}
}
} else {
searcher.search(new MatchAllDocsQuery(), new SimpleCollector() {
private BinaryDocValues terms;
private int docBase;
@Override
public void collect(int doc) throws IOException {
if (doc > terms.docID()) {
terms.advance(doc);
}
final BytesRef joinValue;
if (doc == terms.docID()) {
joinValue = terms.binaryValue();
} else {
// missing;
joinValue = new BytesRef(BytesRef.EMPTY_BYTES);
}
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
return;
}
docToJoinScore.put(docBase + doc, joinScore);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
terms = DocValues.getBinary(context.reader(), toField);
docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) {
}
@Override
public boolean needsScores() {
return false;
}
});
}
queryVals.put(uniqueRandomValue, docToJoinScore);
}
if (globalOrdinalJoin) {
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
}
context.searcher = searcher;
context.dir = dir;
return context;
}
use of org.apache.lucene.search.SimpleCollector in project lucene-solr by apache.
the class TestJoinUtil method testInsideBooleanQuery.
/** LUCENE-5487: verify a join query inside a SHOULD BQ
* will still use the join query's optimized BulkScorers */
public void testInsideBooleanQuery() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "7", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("7")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
w.forceMerge(1);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery = JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(joinQuery, BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
indexSearcher.search(bq.build(), new SimpleCollector() {
boolean sawFive;
@Override
public void collect(int docID) {
// appending to head of linked list):
if (docID == 5) {
sawFive = true;
} else if (docID == 1) {
assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
}
}
@Override
public boolean needsScores() {
return false;
}
});
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.search.SimpleCollector in project lucene-solr by apache.
the class JoinUtil method createJoinQuery.
/**
* Method for query time joining for numeric fields. It supports multi- and single- values longs, ints, floats and longs.
* All considerations from {@link JoinUtil#createJoinQuery(String, boolean, String, Query, IndexSearcher, ScoreMode)} are applicable here too,
* though memory consumption might be higher.
* <p>
*
* @param fromField The from field to join from
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
* when true fromField might be {@link DocValuesType#SORTED_NUMERIC},
* otherwise fromField should be {@link DocValuesType#NUMERIC}
* @param toField The to field to join to, should be {@link IntPoint}, {@link LongPoint}, {@link FloatPoint}
* or {@link DoublePoint}.
* @param numericType either {@link java.lang.Integer}, {@link java.lang.Long}, {@link java.lang.Float}
* or {@link java.lang.Double} it should correspond to toField types
* @param fromQuery The query to match documents on the from side
* @param fromSearcher The searcher that executed the specified fromQuery
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @return a {@link Query} instance that can be used to join documents based on the
* terms in the from and to field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String fromField, boolean multipleValuesPerDocument, String toField, Class<? extends Number> numericType, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) throws IOException {
TreeSet<Long> joinValues = new TreeSet<>();
Map<Long, Float> aggregatedScores = new HashMap<>();
Map<Long, Integer> occurrences = new HashMap<>();
boolean needsScore = scoreMode != ScoreMode.None;
BiConsumer<Long, Float> scoreAggregator;
if (scoreMode == ScoreMode.Max) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.max(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Min) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, Math.min(currentValue, score));
}
};
} else if (scoreMode == ScoreMode.Total) {
scoreAggregator = (key, score) -> {
Float currentValue = aggregatedScores.putIfAbsent(key, score);
if (currentValue != null) {
aggregatedScores.put(key, currentValue + score);
}
};
} else if (scoreMode == ScoreMode.Avg) {
scoreAggregator = (key, score) -> {
Float currentSore = aggregatedScores.putIfAbsent(key, score);
if (currentSore != null) {
aggregatedScores.put(key, currentSore + score);
}
Integer currentOccurrence = occurrences.putIfAbsent(key, 1);
if (currentOccurrence != null) {
occurrences.put(key, ++currentOccurrence);
}
};
} else {
scoreAggregator = (key, score) -> {
throw new UnsupportedOperationException();
};
}
LongFunction<Float> joinScorer;
if (scoreMode == ScoreMode.Avg) {
joinScorer = (joinValue) -> {
Float aggregatedScore = aggregatedScores.get(joinValue);
Integer occurrence = occurrences.get(joinValue);
return aggregatedScore / occurrence;
};
} else {
joinScorer = aggregatedScores::get;
}
Collector collector;
if (multipleValuesPerDocument) {
collector = new SimpleCollector() {
SortedNumericDocValues sortedNumericDocValues;
Scorer scorer;
@Override
public void collect(int doc) throws IOException {
if (doc > sortedNumericDocValues.docID()) {
sortedNumericDocValues.advance(doc);
}
if (doc == sortedNumericDocValues.docID()) {
for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
long value = sortedNumericDocValues.nextValue();
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
sortedNumericDocValues = DocValues.getSortedNumeric(context.reader(), fromField);
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
} else {
collector = new SimpleCollector() {
NumericDocValues numericDocValues;
Scorer scorer;
private int lastDocID = -1;
private boolean docsInOrder(int docID) {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
}
lastDocID = docID;
return true;
}
@Override
public void collect(int doc) throws IOException {
assert docsInOrder(doc);
int dvDocID = numericDocValues.docID();
if (dvDocID < doc) {
dvDocID = numericDocValues.advance(doc);
}
long value;
if (dvDocID == doc) {
value = numericDocValues.longValue();
} else {
value = 0;
}
joinValues.add(value);
if (needsScore) {
scoreAggregator.accept(value, scorer.score());
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
numericDocValues = DocValues.getNumeric(context.reader(), fromField);
lastDocID = -1;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean needsScores() {
return needsScore;
}
};
}
fromSearcher.search(fromQuery, collector);
Iterator<Long> iterator = joinValues.iterator();
final int bytesPerDim;
final BytesRef encoded = new BytesRef();
final PointInSetIncludingScoreQuery.Stream stream;
if (Integer.class.equals(numericType)) {
bytesPerDim = Integer.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
IntPoint.encodeDimension((int) value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Long.class.equals(numericType)) {
bytesPerDim = Long.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
LongPoint.encodeDimension(value, encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Float.class.equals(numericType)) {
bytesPerDim = Float.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
FloatPoint.encodeDimension(Float.intBitsToFloat((int) value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else if (Double.class.equals(numericType)) {
bytesPerDim = Double.BYTES;
stream = new PointInSetIncludingScoreQuery.Stream() {
@Override
public BytesRef next() {
if (iterator.hasNext()) {
long value = iterator.next();
DoublePoint.encodeDimension(Double.longBitsToDouble(value), encoded.bytes, 0);
if (needsScore) {
score = joinScorer.apply(value);
}
return encoded;
} else {
return null;
}
}
};
} else {
throw new IllegalArgumentException("unsupported numeric type, only Integer, Long, Float and Double are supported");
}
encoded.bytes = new byte[bytesPerDim];
encoded.length = bytesPerDim;
if (needsScore) {
return new PointInSetIncludingScoreQuery(scoreMode, fromQuery, multipleValuesPerDocument, toField, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return toString.apply(value, numericType);
}
};
} else {
return new PointInSetQuery(toField, 1, bytesPerDim, stream) {
@Override
protected String toString(byte[] value) {
return PointInSetIncludingScoreQuery.toString.apply(value, numericType);
}
};
}
}
Aggregations