Search in sources :

Example 46 with Sort

use of org.apache.lucene.search.Sort in project lucene-solr by apache.

the class DistinctValuesCollectorTest method testSimple.

public void testSimple() throws Exception {
    Random random = random();
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "random text", Field.Store.NO));
    doc.add(new StringField("id", "1", Field.Store.NO));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "some more random text blob", Field.Store.NO));
    doc.add(new StringField("id", "2", Field.Store.NO));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addField(doc, GROUP_FIELD, "1");
    addField(doc, COUNT_FIELD, "2");
    doc.add(new TextField("content", "some more random textual data", Field.Store.NO));
    doc.add(new StringField("id", "3", Field.Store.NO));
    w.addDocument(doc);
    // To ensure a second segment
    w.commit();
    // 3 -- no count field
    doc = new Document();
    addField(doc, GROUP_FIELD, "2");
    doc.add(new TextField("content", "some random text", Field.Store.NO));
    doc.add(new StringField("id", "4", Field.Store.NO));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addField(doc, GROUP_FIELD, "3");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "some more random text", Field.Store.NO));
    doc.add(new StringField("id", "5", Field.Store.NO));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addField(doc, GROUP_FIELD, "3");
    addField(doc, COUNT_FIELD, "1");
    doc.add(new TextField("content", "random blob", Field.Store.NO));
    doc.add(new StringField("id", "6", Field.Store.NO));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
    addField(doc, COUNT_FIELD, "1");
    doc.add(new StringField("id", "6", Field.Store.NO));
    w.addDocument(doc);
    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();
    Comparator<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> cmp = (groupCount1, groupCount2) -> {
        if (groupCount1.groupValue == null) {
            if (groupCount2.groupValue == null) {
                return 0;
            }
            return -1;
        } else if (groupCount2.groupValue == null) {
            return 1;
        } else {
            return groupCount1.groupValue.compareTo(groupCount2.groupValue);
        }
    };
    // === Search for content:random
    FirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "random")), firstCollector);
    DistinctValuesCollector<Comparable<Object>, Comparable<Object>> distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
    List<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(4, gcs.size());
    compareNull(gcs.get(0).groupValue);
    List<Comparable<?>> countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    compare("1", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    Collections.sort(countValues, nullComparator);
    assertEquals(2, countValues.size());
    compare("1", countValues.get(0));
    compare("2", countValues.get(1));
    compare("2", gcs.get(2).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
    assertEquals(1, countValues.size());
    compareNull(countValues.get(0));
    compare("3", gcs.get(3).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(3).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    // === Search for content:some
    firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "some")), firstCollector);
    distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
    gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(3, gcs.size());
    compare("1", gcs.get(0).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    assertEquals(2, countValues.size());
    Collections.sort(countValues, nullComparator);
    compare("1", countValues.get(0));
    compare("2", countValues.get(1));
    compare("2", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    assertEquals(1, countValues.size());
    compareNull(countValues.get(0));
    compare("3", gcs.get(2).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(2).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    // === Search for content:blob
    firstCollector = createRandomFirstPassCollector(new Sort(), GROUP_FIELD, 10);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
    distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
    gcs = distinctValuesCollector.getGroups();
    Collections.sort(gcs, cmp);
    assertEquals(2, gcs.size());
    compare("1", gcs.get(0).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(0).uniqueValues);
    // B/c the only one document matched with blob inside the author 1 group
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    compare("3", gcs.get(1).groupValue);
    countValues = new ArrayList<Comparable<?>>(gcs.get(1).uniqueValues);
    assertEquals(1, countValues.size());
    compare("1", countValues.get(0));
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Term(org.apache.lucene.index.Term) TestUtil(org.apache.lucene.util.TestUtil) HashMap(java.util.HashMap) Random(java.util.Random) ArrayList(java.util.ArrayList) MutableValue(org.apache.lucene.util.mutable.MutableValue) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) BytesRefFieldSource(org.apache.lucene.queries.function.valuesource.BytesRefFieldSource) Document(org.apache.lucene.document.Document) Locale(java.util.Locale) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) SortField(org.apache.lucene.search.SortField) MutableValueStr(org.apache.lucene.util.mutable.MutableValueStr) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Collection(java.util.Collection) DirectoryReader(org.apache.lucene.index.DirectoryReader) Set(java.util.Set) IOException(java.io.IOException) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Comparator(java.util.Comparator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 47 with Sort

use of org.apache.lucene.search.Sort in project lucene-solr by apache.

the class DistinctValuesCollectorTest method testRandom.

public void testRandom() throws Exception {
    Random random = random();
    int numberOfRuns = TestUtil.nextInt(random, 3, 6);
    for (int indexIter = 0; indexIter < numberOfRuns; indexIter++) {
        IndexContext context = createIndexContext();
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            final IndexSearcher searcher = newSearcher(context.indexReader);
            String term = context.contentStrings[random.nextInt(context.contentStrings.length)];
            Sort groupSort = new Sort(new SortField("id", SortField.Type.STRING));
            int topN = 1 + random.nextInt(10);
            List<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> expectedResult = createExpectedResult(context, term, groupSort, topN);
            FirstPassGroupingCollector<Comparable<Object>> firstCollector = createRandomFirstPassCollector(groupSort, GROUP_FIELD, topN);
            searcher.search(new TermQuery(new Term("content", term)), firstCollector);
            DistinctValuesCollector<Comparable<Object>, Comparable<Object>> distinctValuesCollector = createDistinctCountCollector(firstCollector, COUNT_FIELD);
            searcher.search(new TermQuery(new Term("content", term)), distinctValuesCollector);
            @SuppressWarnings("unchecked") List<DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>>> actualResult = distinctValuesCollector.getGroups();
            if (VERBOSE) {
                System.out.println("Index iter=" + indexIter);
                System.out.println("Search iter=" + searchIter);
                System.out.println("1st pass collector class name=" + firstCollector.getClass().getName());
                System.out.println("2nd pass collector class name=" + distinctValuesCollector.getClass().getName());
                System.out.println("Search term=" + term);
                System.out.println("1st pass groups=" + firstCollector.getTopGroups(0, false));
                System.out.println("Expected:");
                printGroups(expectedResult);
                System.out.println("Actual:");
                printGroups(actualResult);
            }
            assertEquals(expectedResult.size(), actualResult.size());
            for (int i = 0; i < expectedResult.size(); i++) {
                DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>> expected = expectedResult.get(i);
                DistinctValuesCollector.GroupCount<Comparable<Object>, Comparable<Object>> actual = actualResult.get(i);
                assertValues(expected.groupValue, actual.groupValue);
                assertEquals(expected.uniqueValues.size(), actual.uniqueValues.size());
                List<Comparable<?>> expectedUniqueValues = new ArrayList<>(expected.uniqueValues);
                Collections.sort(expectedUniqueValues, nullComparator);
                List<Comparable<?>> actualUniqueValues = new ArrayList<>(actual.uniqueValues);
                Collections.sort(actualUniqueValues, nullComparator);
                for (int j = 0; j < expectedUniqueValues.size(); j++) {
                    assertValues(expectedUniqueValues.get(j), actualUniqueValues.get(j));
                }
            }
        }
        context.indexReader.close();
        context.directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) Random(java.util.Random) Sort(org.apache.lucene.search.Sort)

Example 48 with Sort

use of org.apache.lucene.search.Sort in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testBasic.

public void testBasic() throws Exception {
    final String groupField = "author";
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    DocValuesType valueType = DocValuesType.SORTED;
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 1));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 2));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("2")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 3));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("3")));
    w.addDocument(doc);
    // To ensure a second segment
    w.commit();
    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2", valueType);
    doc.add(newTextField("content", "some random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 4));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("4")));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "some more random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 5));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("5")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "random blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 7 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 7));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("7")));
    w.addDocument(doc);
    IndexReader reader = w.getReader();
    IndexSearcher indexSearcher = newSearcher(reader);
    w.close();
    int maxDoc = reader.maxDoc();
    Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type.INT, true));
    AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    // STRING sort type triggers different implementation
    Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type.STRING, true));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup2);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type.STRING, false));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup3);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    // 7 b/c higher doc id wins, even if order of field is in not in reverse.
    assertTrue(arrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 49 with Sort

use of org.apache.lucene.search.Sort in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        DocValuesType valueType = DocValuesType.SORTED;
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field valuesField = null;
        valuesField = new SortedDocValuesField("group", new BytesRef());
        doc.add(valuesField);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
        doc.add(sort3);
        docNoGroup.add(sort3);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), new BytesRef(String.format(Locale.ROOT, "%05d", i)), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString() + " sort3=" + groupDoc.sort3.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
            }
            sort1.setBytesValue(groupDoc.sort1);
            sort2.setBytesValue(groupDoc.sort2);
            sort3.setBytesValue(groupDoc.sort3);
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        final int[] docIDToFieldId = new int[numDocs];
        final int[] fieldIdToDocID = new int[numDocs];
        for (int i = 0; i < numDocs; i++) {
            assertEquals(i, values.nextDoc());
            int fieldId = (int) values.longValue();
            docIDToFieldId[i] = fieldId;
            fieldIdToDocID[fieldId] = i;
        }
        final IndexSearcher s = newSearcher(r);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToFieldId[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                assertEquals(gd.id, idValue);
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        // make sure scores are sane
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("TEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            boolean sortByScoreOnly = random().nextBoolean();
            Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
            AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
            s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
            int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
            int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
            // The actual group heads contains Lucene ids. Need to change them into our id value.
            for (int i = 0; i < actualGroupHeads.length; i++) {
                actualGroupHeads[i] = docIDToFieldId[actualGroupHeads[i]];
            }
            // Allows us the easily iterate and assert the actual and expected results.
            Arrays.sort(expectedGroupHeads);
            Arrays.sort(actualGroupHeads);
            if (VERBOSE) {
                System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
                System.out.println("Sort within group: " + sortWithinGroup);
                System.out.println("Num group: " + numGroups);
                System.out.println("Num doc: " + numDocs);
                System.out.println("\n=== Expected: \n");
                for (int expectedDocId : expectedGroupHeads) {
                    GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                    String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(), expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId));
                }
                System.out.println("\n=== Actual: \n");
                for (int actualDocId : actualGroupHeads) {
                    GroupDoc actualGroupDoc = groupDocs[actualDocId];
                    String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(), actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId));
                }
                System.out.println("\n===================================================================================");
            }
            assertArrayEquals(expectedGroupHeads, actualGroupHeads);
        }
        r.close();
        dir.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 50 with Sort

use of org.apache.lucene.search.Sort in project lucene-solr by apache.

the class TestBlockJoinSorting method testNestedSorting.

@Test
public void testNestedSorting() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    List<Document> docs = new ArrayList<>();
    Document document = new Document();
    document.add(new StringField("field2", "a", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("a")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "b", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("b")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "c", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("c")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "a", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    w.commit();
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "c", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("c")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "d", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("d")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "e", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("e")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "b", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "e", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("e")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "f", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("f")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "g", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("g")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "c", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "g", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("g")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "h", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("h")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "i", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("i")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "d", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    w.commit();
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "i", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("i")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "j", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("j")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "k", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("k")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "f", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "k", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("k")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "l", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("l")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "m", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("m")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "g", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    docs.clear();
    document = new Document();
    document.add(new StringField("field2", "m", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("m")));
    document.add(new StringField("filter_1", "T", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "n", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("n")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("field2", "o", Field.Store.NO));
    document.add(new SortedDocValuesField("field2", new BytesRef("o")));
    document.add(new StringField("filter_1", "F", Field.Store.NO));
    docs.add(document);
    document = new Document();
    document.add(new StringField("__type", "parent", Field.Store.NO));
    document.add(new StringField("field1", "i", Field.Store.NO));
    docs.add(document);
    w.addDocuments(docs);
    w.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w.w));
    w.close();
    BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("__type", "parent")));
    CheckJoinIndex.check(searcher.getIndexReader(), parentFilter);
    BitSetProducer childFilter = new QueryBitSetProducer(new PrefixQuery(new Term("field2")));
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new PrefixQuery(new Term("field2")), parentFilter, ScoreMode.None);
    // Sort by field ascending, order first
    ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type.STRING, false, parentFilter, childFilter);
    Sort sort = new Sort(sortField);
    TopFieldDocs topDocs = searcher.search(query, 5, sort);
    assertEquals(7, topDocs.totalHits);
    assertEquals(5, topDocs.scoreDocs.length);
    assertEquals(3, topDocs.scoreDocs[0].doc);
    assertEquals("a", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
    assertEquals(7, topDocs.scoreDocs[1].doc);
    assertEquals("c", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
    assertEquals(11, topDocs.scoreDocs[2].doc);
    assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
    assertEquals(15, topDocs.scoreDocs[3].doc);
    assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
    assertEquals(19, topDocs.scoreDocs[4].doc);
    assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
    // Sort by field ascending, order last
    sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, false, true, parentFilter, childFilter));
    sort = new Sort(sortField);
    topDocs = searcher.search(query, 5, sort);
    assertEquals(7, topDocs.totalHits);
    assertEquals(5, topDocs.scoreDocs.length);
    assertEquals(3, topDocs.scoreDocs[0].doc);
    assertEquals("c", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
    assertEquals(7, topDocs.scoreDocs[1].doc);
    assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
    assertEquals(11, topDocs.scoreDocs[2].doc);
    assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
    assertEquals(15, topDocs.scoreDocs[3].doc);
    assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
    assertEquals(19, topDocs.scoreDocs[4].doc);
    assertEquals("k", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
    // Sort by field descending, order last
    sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, parentFilter, childFilter));
    sort = new Sort(sortField);
    topDocs = searcher.search(query, 5, sort);
    assertEquals(topDocs.totalHits, 7);
    assertEquals(5, topDocs.scoreDocs.length);
    assertEquals(27, topDocs.scoreDocs[0].doc);
    assertEquals("o", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
    assertEquals(23, topDocs.scoreDocs[1].doc);
    assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
    assertEquals(19, topDocs.scoreDocs[2].doc);
    assertEquals("k", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
    assertEquals(15, topDocs.scoreDocs[3].doc);
    assertEquals("i", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
    assertEquals(11, topDocs.scoreDocs[4].doc);
    assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
    // Sort by field descending, order last, sort filter (filter_1:T)
    BitSetProducer childFilter1T = new QueryBitSetProducer(new TermQuery((new Term("filter_1", "T"))));
    query = new ToParentBlockJoinQuery(new TermQuery((new Term("filter_1", "T"))), parentFilter, ScoreMode.None);
    sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, parentFilter, childFilter1T));
    sort = new Sort(sortField);
    topDocs = searcher.search(query, 5, sort);
    assertEquals(6, topDocs.totalHits);
    assertEquals(5, topDocs.scoreDocs.length);
    assertEquals(23, topDocs.scoreDocs[0].doc);
    assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString());
    assertEquals(27, topDocs.scoreDocs[1].doc);
    assertEquals("m", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString());
    assertEquals(11, topDocs.scoreDocs[2].doc);
    assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString());
    assertEquals(15, topDocs.scoreDocs[3].doc);
    assertEquals("g", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString());
    assertEquals(7, topDocs.scoreDocs[4].doc);
    assertEquals("e", ((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString());
    sortField = notEqual(sortField, () -> new ToParentBlockJoinSortField("field2", SortField.Type.STRING, true, new QueryBitSetProducer(new TermQuery(new Term("__type", "another"))), childFilter1T));
    searcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PrefixQuery(org.apache.lucene.search.PrefixQuery) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) Test(org.junit.Test)

Aggregations

Sort (org.apache.lucene.search.Sort)244 SortField (org.apache.lucene.search.SortField)181 Document (org.apache.lucene.document.Document)139 Directory (org.apache.lucene.store.Directory)129 IndexSearcher (org.apache.lucene.search.IndexSearcher)108 TopDocs (org.apache.lucene.search.TopDocs)92 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)86 IndexReader (org.apache.lucene.index.IndexReader)72 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)72 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)56 SortedSetSortField (org.apache.lucene.search.SortedSetSortField)51 TermQuery (org.apache.lucene.search.TermQuery)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)42 Query (org.apache.lucene.search.Query)41 ArrayList (java.util.ArrayList)37 Term (org.apache.lucene.index.Term)36 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)35 BytesRef (org.apache.lucene.util.BytesRef)32 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)30