use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class AllGroupHeadsCollectorTest method testRandom.
public void testRandom() throws Exception {
int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
for (int iter = 0; iter < numberOfRuns; iter++) {
if (VERBOSE) {
System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
}
final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
}
final List<BytesRef> groups = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
String randomValue;
do {
// B/c of DV based impl we can't see the difference between an empty string and a null value.
// For that reason we don't generate empty string groups.
randomValue = TestUtil.randomRealisticUnicodeString(random());
//randomValue = TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
}
final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
final StringBuilder sb = new StringBuilder();
sb.append("real").append(random().nextInt(3)).append(' ');
final int fakeCount = random().nextInt(10);
for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
sb.append("fake ");
}
contentStrings[contentIDX] = sb.toString();
if (VERBOSE) {
System.out.println(" content=" + sb.toString());
}
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
DocValuesType valueType = DocValuesType.SORTED;
Document doc = new Document();
Document docNoGroup = new Document();
Field valuesField = null;
valuesField = new SortedDocValuesField("group", new BytesRef());
doc.add(valuesField);
Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
doc.add(sort1);
docNoGroup.add(sort1);
Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
doc.add(sort2);
docNoGroup.add(sort2);
Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
doc.add(sort3);
docNoGroup.add(sort3);
Field content = newTextField("content", "", Field.Store.NO);
doc.add(content);
docNoGroup.add(content);
NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
doc.add(idDV);
docNoGroup.add(idDV);
final GroupDoc[] groupDocs = new GroupDoc[numDocs];
for (int i = 0; i < numDocs; i++) {
final BytesRef groupValue;
if (random().nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = null;
} else {
groupValue = groups.get(random().nextInt(groups.size()));
}
final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), new BytesRef(String.format(Locale.ROOT, "%05d", i)), contentStrings[random().nextInt(contentStrings.length)]);
if (VERBOSE) {
System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString() + " sort3=" + groupDoc.sort3.utf8ToString());
}
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
}
sort1.setBytesValue(groupDoc.sort1);
sort2.setBytesValue(groupDoc.sort2);
sort3.setBytesValue(groupDoc.sort3);
content.setStringValue(groupDoc.content);
idDV.setLongValue(groupDoc.id);
if (groupDoc.group == null) {
w.addDocument(docNoGroup);
} else {
w.addDocument(doc);
}
}
final DirectoryReader r = w.getReader();
w.close();
NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
final int[] docIDToFieldId = new int[numDocs];
final int[] fieldIdToDocID = new int[numDocs];
for (int i = 0; i < numDocs; i++) {
assertEquals(i, values.nextDoc());
int fieldId = (int) values.longValue();
docIDToFieldId[i] = fieldId;
fieldIdToDocID[fieldId] = i;
}
final IndexSearcher s = newSearcher(r);
Set<Integer> seenIDs = new HashSet<>();
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
int idValue = docIDToFieldId[hit.doc];
final GroupDoc gd = groupDocs[idValue];
assertEquals(gd.id, idValue);
seenIDs.add(idValue);
assertTrue(gd.score == 0.0);
gd.score = hit.score;
}
}
// make sure all groups were seen across the hits
assertEquals(groupDocs.length, seenIDs.size());
// make sure scores are sane
for (GroupDoc gd : groupDocs) {
assertTrue(Float.isFinite(gd.score));
assertTrue(gd.score >= 0.0);
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
boolean sortByScoreOnly = random().nextBoolean();
Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
// The actual group heads contains Lucene ids. Need to change them into our id value.
for (int i = 0; i < actualGroupHeads.length; i++) {
actualGroupHeads[i] = docIDToFieldId[actualGroupHeads[i]];
}
// Allows us the easily iterate and assert the actual and expected results.
Arrays.sort(expectedGroupHeads);
Arrays.sort(actualGroupHeads);
if (VERBOSE) {
System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
System.out.println("Sort within group: " + sortWithinGroup);
System.out.println("Num group: " + numGroups);
System.out.println("Num doc: " + numDocs);
System.out.println("\n=== Expected: \n");
for (int expectedDocId : expectedGroupHeads) {
GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(), expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId));
}
System.out.println("\n=== Actual: \n");
for (int actualDocId : actualGroupHeads) {
GroupDoc actualGroupDoc = groupDocs[actualDocId];
String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(), actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId));
}
System.out.println("\n===================================================================================");
}
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
r.close();
dir.close();
}
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class GroupingSearchTest method testSetAllGroups.
public void testSetAllGroups() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED));
doc.add(new SortedDocValuesField("group", new BytesRef("foo")));
w.addDocument(doc);
IndexSearcher indexSearcher = newSearcher(w.getReader());
w.close();
GroupingSearch gs = new GroupingSearch("group");
gs.setAllGroups(true);
TopGroups<?> groups = gs.search(indexSearcher, new TermQuery(new Term("group", "foo")), 0, 10);
assertEquals(1, groups.totalHitCount);
//assertEquals(1, groups.totalGroupCount.intValue());
assertEquals(1, groups.totalGroupedHitCount);
assertEquals(1, gs.getAllMatchingGroups().size());
indexSearcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestBlockJoin method testRandom.
public void testRandom() throws Exception {
// We build two indices at once: one normalized (which
// ToParentBlockJoinQuery/Collector,
// ToChildBlockJoinQuery can query) and the other w/
// the same docs, just fully denormalized:
final Directory dir = newDirectory();
final Directory joinDir = newDirectory();
final int maxNumChildrenPerParent = 20;
final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
// Values for parent fields:
final String[][] parentFields = getRandomFields(numParentDocs / 2);
// Values for child fields:
final String[][] childFields = getRandomFields(numParentDocs);
final boolean doDeletes = random().nextBoolean();
final List<Integer> toDelete = new ArrayList<>();
// TODO: parallel star join, nested join cases too!
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final RandomIndexWriter joinW = new RandomIndexWriter(random(), joinDir);
for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++) {
Document parentDoc = new Document();
Document parentJoinDoc = new Document();
Field id = new StoredField("parentID", parentDocID);
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
id = new NumericDocValuesField("parentID", parentDocID);
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
for (int field = 0; field < parentFields.length; field++) {
if (random().nextDouble() < 0.9) {
String s = parentFields[field][random().nextInt(parentFields[field].length)];
Field f = newStringField("parent" + field, s, Field.Store.NO);
parentDoc.add(f);
parentJoinDoc.add(f);
f = new SortedDocValuesField("parent" + field, new BytesRef(s));
parentDoc.add(f);
parentJoinDoc.add(f);
}
}
if (doDeletes) {
parentDoc.add(new IntPoint("blockID", parentDocID));
parentJoinDoc.add(new IntPoint("blockID", parentDocID));
}
final List<Document> joinDocs = new ArrayList<>();
if (VERBOSE) {
StringBuilder sb = new StringBuilder();
sb.append("parentID=").append(parentDoc.get("parentID"));
for (int fieldID = 0; fieldID < parentFields.length; fieldID++) {
String s = parentDoc.get("parent" + fieldID);
if (s != null) {
sb.append(" parent" + fieldID + "=" + s);
}
}
System.out.println(" " + sb.toString());
}
final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
for (int childDocID = 0; childDocID < numChildDocs; childDocID++) {
// Denormalize: copy all parent fields into child doc:
Document childDoc = TestUtil.cloneDocument(parentDoc);
Document joinChildDoc = new Document();
joinDocs.add(joinChildDoc);
Field childID = new StoredField("childID", childDocID);
childDoc.add(childID);
joinChildDoc.add(childID);
childID = new NumericDocValuesField("childID", childDocID);
childDoc.add(childID);
joinChildDoc.add(childID);
for (int childFieldID = 0; childFieldID < childFields.length; childFieldID++) {
if (random().nextDouble() < 0.9) {
String s = childFields[childFieldID][random().nextInt(childFields[childFieldID].length)];
Field f = newStringField("child" + childFieldID, s, Field.Store.NO);
childDoc.add(f);
joinChildDoc.add(f);
f = new SortedDocValuesField("child" + childFieldID, new BytesRef(s));
childDoc.add(f);
joinChildDoc.add(f);
}
}
if (VERBOSE) {
StringBuilder sb = new StringBuilder();
sb.append("childID=").append(joinChildDoc.get("childID"));
for (int fieldID = 0; fieldID < childFields.length; fieldID++) {
String s = joinChildDoc.get("child" + fieldID);
if (s != null) {
sb.append(" child" + fieldID + "=" + s);
}
}
System.out.println(" " + sb.toString());
}
if (doDeletes) {
joinChildDoc.add(new IntPoint("blockID", parentDocID));
}
w.addDocument(childDoc);
}
// Parent last:
joinDocs.add(parentJoinDoc);
joinW.addDocuments(joinDocs);
if (doDeletes && random().nextInt(30) == 7) {
toDelete.add(parentDocID);
}
}
if (!toDelete.isEmpty()) {
Query query = IntPoint.newSetQuery("blockID", toDelete);
w.deleteDocuments(query);
joinW.deleteDocuments(query);
}
final IndexReader r = w.getReader();
w.close();
final IndexReader joinR = joinW.getReader();
joinW.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
System.out.println("TEST: joinReader=" + joinR);
Bits liveDocs = MultiFields.getLiveDocs(joinR);
for (int docIDX = 0; docIDX < joinR.maxDoc(); docIDX++) {
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
}
PostingsEnum parents = MultiFields.getTermDocsEnum(joinR, "isParent", new BytesRef("x"));
System.out.println("parent docIDs:");
while (parents.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
System.out.println(" " + parents.docID());
}
}
final IndexSearcher s = newSearcher(r, false);
final IndexSearcher joinS = newSearcher(joinR);
final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
final int iters = 200 * RANDOM_MULTIPLIER;
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + (1 + iter) + " of " + iters);
}
Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
} else if (random().nextInt(3) == 2) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
final int numClauses = TestUtil.nextInt(random(), 2, 4);
boolean didMust = false;
for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
Query clause;
BooleanClause.Occur occur;
if (!didMust && random().nextBoolean()) {
occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(randomChildTerm(childFields[0]));
didMust = true;
} else {
occur = BooleanClause.Occur.SHOULD;
final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
}
bq.add(clause, occur);
}
childQuery = bq.build();
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(randomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
if (random().nextBoolean()) {
childQuery = new RandomApproximationQuery(childQuery, random());
}
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
// To run against the block-join index:
final Query parentJoinQuery;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
final Query parentQuery;
if (random().nextBoolean()) {
parentQuery = childQuery;
parentJoinQuery = childJoinQuery;
} else {
// AND parent field w/ child field
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
final Term parentTerm = randomParentTerm(parentFields[0]);
if (random().nextBoolean()) {
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
} else {
bq.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
}
final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq2.add(childQuery, BooleanClause.Occur.MUST);
bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
} else {
bq2.add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
bq2.add(childQuery, BooleanClause.Occur.MUST);
}
parentJoinQuery = bq.build();
parentQuery = bq2.build();
}
final Sort parentSort = getRandomSort("parent", parentFields.length);
final Sort childSort = getRandomSort("child", childFields.length);
if (VERBOSE) {
System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
}
// Merge both sorts:
final List<SortField> sortFields = new ArrayList<>(Arrays.asList(parentSort.getSort()));
sortFields.addAll(Arrays.asList(childSort.getSort()));
final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()]));
final TopDocs results = s.search(parentQuery, r.numDocs(), parentAndChildSort);
if (VERBOSE) {
System.out.println("\nTEST: normal index gets " + results.totalHits + " hits; sort=" + parentAndChildSort);
final ScoreDoc[] hits = results.scoreDocs;
for (int hitIDX = 0; hitIDX < hits.length; hitIDX++) {
final Document doc = s.doc(hits[hitIDX].doc);
//System.out.println(" score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
System.out.println(" parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
FieldDoc fd = (FieldDoc) hits[hitIDX];
if (fd.fields != null) {
System.out.print(" " + fd.fields.length + " sort values: ");
for (Object o : fd.fields) {
if (o instanceof BytesRef) {
System.out.print(((BytesRef) o).utf8ToString() + " ");
} else {
System.out.print(o + " ");
}
}
System.out.println();
}
}
}
TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
for (ScoreDoc parentHit : joinedResults.scoreDocs) {
ParentChildrenBlockJoinQuery childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
final Document parentDoc = joinS.doc(parentHit.doc);
joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
}
final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
if (VERBOSE) {
System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null) {
for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
System.out.println(" group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
for (ScoreDoc childHit : entry.getValue().scoreDocs) {
final Document doc = joinS.doc(childHit.doc);
// System.out.println(" score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
}
}
}
}
if (results.totalHits == 0) {
assertEquals(0, joinResults.size());
} else {
compareHits(r, joinR, results, joinResults);
TopDocs b = joinS.search(childJoinQuery, 10);
for (ScoreDoc hit : b.scoreDocs) {
Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
Document document = joinS.doc(hit.doc - 1);
int childId = Integer.parseInt(document.get("childID"));
//System.out.println(" hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
assertTrue(explanation.isMatch());
assertEquals(hit.score, explanation.getValue(), 0.0f);
Matcher m = Pattern.compile("Score based on ([0-9]+) child docs in range from ([0-9]+) to ([0-9]+), best match:").matcher(explanation.getDescription());
assertTrue("Block Join description not matches", m.matches());
assertTrue("Matched children not positive", Integer.parseInt(m.group(1)) > 0);
assertEquals("Wrong child range start", hit.doc - 1 - childId, Integer.parseInt(m.group(2)));
assertEquals("Wrong child range end", hit.doc - 1, Integer.parseInt(m.group(3)));
Explanation childWeightExplanation = explanation.getDetails()[0];
if ("sum of:".equals(childWeightExplanation.getDescription())) {
childWeightExplanation = childWeightExplanation.getDetails()[0];
}
assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
}
}
// Test joining in the opposite direction (parent to
// child):
// Get random query against parent documents:
final Query parentQuery2;
if (random().nextInt(3) == 2) {
final int fieldID = random().nextInt(parentFields.length);
parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
} else if (random().nextInt(3) == 2) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
final int numClauses = TestUtil.nextInt(random(), 2, 4);
boolean didMust = false;
for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) {
Query clause;
BooleanClause.Occur occur;
if (!didMust && random().nextBoolean()) {
occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(randomParentTerm(parentFields[0]));
didMust = true;
} else {
occur = BooleanClause.Occur.SHOULD;
final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
}
bq.add(clause, occur);
}
parentQuery2 = bq.build();
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(randomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
bq.add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])), random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
parentQuery2 = bq.build();
}
if (VERBOSE) {
System.out.println("\nTEST: top down: parentQuery2=" + parentQuery2);
}
// Maps parent query to child docs:
final ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter);
// To run against the block-join index:
Query childJoinQuery2;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
Query childQuery2;
if (random().nextBoolean()) {
childQuery2 = parentQuery2;
childJoinQuery2 = parentJoinQuery2;
} else {
final Term childTerm = randomChildTerm(childFields[0]);
if (random().nextBoolean()) {
// filtered case
childJoinQuery2 = parentJoinQuery2;
childJoinQuery2 = new BooleanQuery.Builder().add(childJoinQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
} else {
// AND child field w/ parent query:
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
} else {
bq.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
}
childJoinQuery2 = bq.build();
}
if (random().nextBoolean()) {
// filtered case
childQuery2 = parentQuery2;
childQuery2 = new BooleanQuery.Builder().add(childQuery2, Occur.MUST).add(new TermQuery(childTerm), Occur.FILTER).build();
} else {
final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq2.add(parentQuery2, BooleanClause.Occur.MUST);
bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
} else {
bq2.add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
bq2.add(parentQuery2, BooleanClause.Occur.MUST);
}
childQuery2 = bq2.build();
}
}
final Sort childSort2 = getRandomSort("child", childFields.length);
// Search denormalized index:
if (VERBOSE) {
System.out.println("TEST: run top down query=" + childQuery2 + " sort=" + childSort2);
}
final TopDocs results2 = s.search(childQuery2, r.numDocs(), childSort2);
if (VERBOSE) {
System.out.println(" " + results2.totalHits + " totalHits:");
for (ScoreDoc sd : results2.scoreDocs) {
final Document doc = s.doc(sd.doc);
System.out.println(" childID=" + doc.get("childID") + " parentID=" + doc.get("parentID") + " docID=" + sd.doc);
}
}
// Search join index:
if (VERBOSE) {
System.out.println("TEST: run top down join query=" + childJoinQuery2 + " sort=" + childSort2);
}
TopDocs joinResults2 = joinS.search(childJoinQuery2, joinR.numDocs(), childSort2);
if (VERBOSE) {
System.out.println(" " + joinResults2.totalHits + " totalHits:");
for (ScoreDoc sd : joinResults2.scoreDocs) {
final Document doc = joinS.doc(sd.doc);
final Document parentDoc = getParentDoc(joinR, parentsFilter, sd.doc);
System.out.println(" childID=" + doc.get("childID") + " parentID=" + parentDoc.get("parentID") + " docID=" + sd.doc);
}
}
compareChildHits(r, joinR, results2, joinResults2);
}
r.close();
joinR.close();
dir.close();
joinDir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestBlockJoinSorting method testNestedSorting.
@Test
public void testNestedSorting() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
List<Document> docs = new ArrayList<>();
Document document = new Document();
document.add(new StringField("field2", "a", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("a")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "b", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("b")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "c", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("c")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "a", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
w.commit();
docs.clear();
document = new Document();
document.add(new StringField("field2", "c", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("c")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "d", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("d")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "e", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("e")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "b", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
docs.clear();
document = new Document();
document.add(new StringField("field2", "e", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("e")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "f", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("f")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "g", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("g")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "c", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
docs.clear();
document = new Document();
document.add(new StringField("field2", "g", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("g")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "h", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("h")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "i", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("i")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "d", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
w.commit();
docs.clear();
document = new Document();
document.add(new StringField("field2", "i", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("i")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "j", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("j")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "k", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("k")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "f", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
docs.clear();
document = new Document();
document.add(new StringField("field2", "k", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("k")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "l", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("l")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "m", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("m")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "g", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
docs.clear();
document = new Document();
document.add(new StringField("field2", "m", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("m")));
document.add(new StringField("filter_1", "T", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "n", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("n")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("field2", "o", Field.Store.NO));
document.add(new SortedDocValuesField("field2", new BytesRef("o")));
document.add(new StringField("filter_1", "F", Field.Store.NO));
docs.add(document);
document = new Document();
document.add(new StringField("__type", "parent", Field.Store.NO));
document.add(new StringField("field1", "i", Field.Store.NO));
docs.add(document);
w.addDocuments(docs);
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w.w));
w.close();
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("__type", "parent")));
CheckJoinIndex.check(searcher.getIndexReader(), parentFilter);
BitSetProducer childFilter = new QueryBitSetProducer(new PrefixQuery(new Term("field2")));
ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new PrefixQuery(new Term("field2")), parentFilter, ScoreMode.None);
// Sort by field ascending, order first
ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type.STRING, false, parentFilter, childFilter);
Sort sort = new Sort(sortField);
TopFieldDocs topDocs = searcher.search(query, 5, sort);
assertEquals(7, topDocs.totalHits);
assertEquals(5, topDocs.scoreDocs.length);
assertEquals(3, topDocs.scoreDocs[0].doc);
assertEquals("a", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
assertEquals(7, topDocs.scoreDocs[1].doc);
assertEquals("c", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
assertEquals(11, topDocs.scoreDocs[2].doc);
assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
assertEquals(15, topDocs.scoreDocs[3].doc);
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
assertEquals(19, topDocs.scoreDocs[4].doc);
assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
// Sort by field ascending, order last
sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, false, true, parentFilter, childFilter));
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
assertEquals(7, topDocs.totalHits);
assertEquals(5, topDocs.scoreDocs.length);
assertEquals(3, topDocs.scoreDocs[0].doc);
assertEquals("c", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
assertEquals(7, topDocs.scoreDocs[1].doc);
assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
assertEquals(11, topDocs.scoreDocs[2].doc);
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
assertEquals(15, topDocs.scoreDocs[3].doc);
assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
assertEquals(19, topDocs.scoreDocs[4].doc);
assertEquals("k", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
// Sort by field descending, order last
sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, parentFilter, childFilter));
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
assertEquals(topDocs.totalHits, 7);
assertEquals(5, topDocs.scoreDocs.length);
assertEquals(27, topDocs.scoreDocs[0].doc);
assertEquals("o", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
assertEquals(23, topDocs.scoreDocs[1].doc);
assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
assertEquals(19, topDocs.scoreDocs[2].doc);
assertEquals("k", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
assertEquals(15, topDocs.scoreDocs[3].doc);
assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
assertEquals(11, topDocs.scoreDocs[4].doc);
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
// Sort by field descending, order last, sort filter (filter_1:T)
BitSetProducer childFilter1T = new QueryBitSetProducer(new TermQuery((new Term("filter_1", "T"))));
query = new ToParentBlockJoinQuery(new TermQuery((new Term("filter_1", "T"))), parentFilter, ScoreMode.None);
sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, parentFilter, childFilter1T));
sort = new Sort(sortField);
topDocs = searcher.search(query, 5, sort);
assertEquals(6, topDocs.totalHits);
assertEquals(5, topDocs.scoreDocs.length);
assertEquals(23, topDocs.scoreDocs[0].doc);
assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
assertEquals(27, topDocs.scoreDocs[1].doc);
assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
assertEquals(11, topDocs.scoreDocs[2].doc);
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
assertEquals(15, topDocs.scoreDocs[3].doc);
assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
assertEquals(7, topDocs.scoreDocs[4].doc);
assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, new QueryBitSetProducer(new TermQuery(new Term("__type", "another"))), childFilter1T));
searcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestLegacyFieldCache method testDocValuesIntegration.
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER);
});
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER);
});
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER);
assertEquals(0, numeric.nextDoc());
assertEquals(42, numeric.longValue());
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER);
});
ir.close();
dir.close();
}
Aggregations