Search in sources :

Example 11 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project neo4j by neo4j.

the class DocValuesCollector method getTopDocs.

private TopDocs getTopDocs(Sort sort, int size) throws IOException {
    TopDocs topDocs;
    if (sort == Sort.RELEVANCE) {
        TopScoreDocCollector collector = TopScoreDocCollector.create(size);
        replayTo(collector);
        topDocs = collector.topDocs();
    } else {
        TopFieldCollector collector = TopFieldCollector.create(sort, size, false, true, false);
        replayTo(collector);
        topDocs = collector.topDocs();
    }
    return topDocs;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector)

Example 12 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project lucene-solr by apache.

the class TestSort method testSort.

public void testSort() throws Exception {
    Directory dir = new RAMDirectory();
    Field f = new StringField("f", "0", Field.Store.NO);
    Field f2 = new StringField("f2", "0", Field.Store.NO);
    for (int iterCnt = 0; iterCnt < iter; iterCnt++) {
        IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new SimpleAnalyzer()).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
        final MyDoc[] mydocs = new MyDoc[ndocs];
        int v1EmptyPercent = 50;
        int v2EmptyPercent = 50;
        int commitCountdown = commitCount;
        for (int i = 0; i < ndocs; i++) {
            MyDoc mydoc = new MyDoc();
            mydoc.doc = i;
            mydocs[i] = mydoc;
            Document document = new Document();
            if (r.nextInt(100) < v1EmptyPercent) {
                mydoc.val = Integer.toString(r.nextInt(maxval));
                f.setStringValue(mydoc.val);
                document.add(f);
            }
            if (r.nextInt(100) < v2EmptyPercent) {
                mydoc.val2 = Integer.toString(r.nextInt(maxval));
                f2.setStringValue(mydoc.val2);
                document.add(f2);
            }
            iw.addDocument(document);
            if (--commitCountdown <= 0) {
                commitCountdown = commitCount;
                iw.commit();
            }
        }
        iw.close();
        Map<String, UninvertingReader.Type> mapping = new HashMap<>();
        mapping.put("f", UninvertingReader.Type.SORTED);
        mapping.put("f2", UninvertingReader.Type.SORTED);
        DirectoryReader reader = UninvertingReader.wrap(DirectoryReader.open(dir), mapping);
        IndexSearcher searcher = new IndexSearcher(reader);
        // System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
        assertTrue(reader.leaves().size() > 1);
        for (int i = 0; i < qiter; i++) {
            Filter filt = new Filter() {

                @Override
                public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) {
                    return BitsFilteredDocIdSet.wrap(randSet(context.reader().maxDoc()), acceptDocs);
                }

                @Override
                public String toString(String field) {
                    return "TestSortFilter";
                }

                @Override
                public boolean equals(Object other) {
                    return other == this;
                }

                @Override
                public int hashCode() {
                    return System.identityHashCode(this);
                }
            };
            int top = r.nextInt((ndocs >> 3) + 1) + 1;
            final boolean luceneSort = r.nextBoolean();
            final boolean sortMissingLast = !luceneSort && r.nextBoolean();
            final boolean sortMissingFirst = !luceneSort && !sortMissingLast;
            final boolean reverse = r.nextBoolean();
            List<SortField> sfields = new ArrayList<>();
            final boolean secondary = r.nextBoolean();
            final boolean luceneSort2 = r.nextBoolean();
            final boolean sortMissingLast2 = !luceneSort2 && r.nextBoolean();
            final boolean sortMissingFirst2 = !luceneSort2 && !sortMissingLast2;
            final boolean reverse2 = r.nextBoolean();
            if (r.nextBoolean())
                sfields.add(new SortField(null, SortField.Type.SCORE));
            // hit both use-cases of sort-missing-last
            sfields.add(Sorting.getStringSortField("f", reverse, sortMissingLast, sortMissingFirst));
            if (secondary) {
                sfields.add(Sorting.getStringSortField("f2", reverse2, sortMissingLast2, sortMissingFirst2));
            }
            if (r.nextBoolean())
                sfields.add(new SortField(null, SortField.Type.SCORE));
            Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
            final String nullRep = luceneSort || sortMissingFirst && !reverse || sortMissingLast && reverse ? "" : "zzz";
            final String nullRep2 = luceneSort2 || sortMissingFirst2 && !reverse2 || sortMissingLast2 && reverse2 ? "" : "zzz";
            boolean trackScores = r.nextBoolean();
            boolean trackMaxScores = r.nextBoolean();
            boolean scoreInOrder = r.nextBoolean();
            final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores);
            final List<MyDoc> collectedDocs = new ArrayList<>();
            // delegate and collect docs ourselves
            Collector myCollector = new FilterCollector(topCollector) {

                @Override
                public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
                    final int docBase = context.docBase;
                    return new FilterLeafCollector(super.getLeafCollector(context)) {

                        @Override
                        public void collect(int doc) throws IOException {
                            super.collect(doc);
                            collectedDocs.add(mydocs[docBase + doc]);
                        }
                    };
                }
            };
            searcher.search(filt, myCollector);
            Collections.sort(collectedDocs, (o1, o2) -> {
                String v1 = o1.val == null ? nullRep : o1.val;
                String v2 = o2.val == null ? nullRep : o2.val;
                int cmp = v1.compareTo(v2);
                if (reverse)
                    cmp = -cmp;
                if (cmp != 0)
                    return cmp;
                if (secondary) {
                    v1 = o1.val2 == null ? nullRep2 : o1.val2;
                    v2 = o2.val2 == null ? nullRep2 : o2.val2;
                    cmp = v1.compareTo(v2);
                    if (reverse2)
                        cmp = -cmp;
                }
                cmp = cmp == 0 ? o1.doc - o2.doc : cmp;
                return cmp;
            });
            TopDocs topDocs = topCollector.topDocs();
            ScoreDoc[] sdocs = topDocs.scoreDocs;
            for (int j = 0; j < sdocs.length; j++) {
                int id = sdocs[j].doc;
                if (id != collectedDocs.get(j).doc) {
                    log.error("Error at pos " + j + "\n\tsortMissingFirst=" + sortMissingFirst + " sortMissingLast=" + sortMissingLast + " reverse=" + reverse + "\n\tEXPECTED=" + collectedDocs);
                }
                assertEquals(id, collectedDocs.get(j).doc);
            }
        }
        reader.close();
    }
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) StringField(org.apache.lucene.document.StringField) SchemaField(org.apache.solr.schema.SchemaField) SortField(org.apache.lucene.search.SortField) Field(org.apache.lucene.document.Field) LeafCollector(org.apache.lucene.search.LeafCollector) FilterLeafCollector(org.apache.lucene.search.FilterLeafCollector) FilterCollector(org.apache.lucene.search.FilterCollector) Collector(org.apache.lucene.search.Collector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Sort(org.apache.lucene.search.Sort) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) DirectoryReader(org.apache.lucene.index.DirectoryReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Type(org.apache.lucene.search.SortField.Type) FilterCollector(org.apache.lucene.search.FilterCollector) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) FilterLeafCollector(org.apache.lucene.search.FilterLeafCollector) Bits(org.apache.lucene.util.Bits) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 13 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project lucene-solr by apache.

the class ReadTask method doLogic.

@Override
public int doLogic() throws Exception {
    int res = 0;
    // open reader or use existing one
    // (will incRef the reader)
    IndexSearcher searcher = getRunData().getIndexSearcher();
    IndexReader reader;
    final boolean closeSearcher;
    if (searcher == null) {
        // open our own reader
        Directory dir = getRunData().getDirectory();
        reader = DirectoryReader.open(dir);
        searcher = new IndexSearcher(reader);
        closeSearcher = true;
    } else {
        // use existing one; this passes +1 ref to us
        reader = searcher.getIndexReader();
        closeSearcher = false;
    }
    // optionally warm and add num docs traversed to count
    if (withWarm()) {
        Document doc = null;
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        for (int m = 0; m < reader.maxDoc(); m++) {
            if (null == liveDocs || liveDocs.get(m)) {
                doc = reader.document(m);
                res += (doc == null ? 0 : 1);
            }
        }
    }
    if (withSearch()) {
        res++;
        Query q = queryMaker.makeQuery();
        Sort sort = getSort();
        TopDocs hits = null;
        final int numHits = numHits();
        if (numHits > 0) {
            if (withCollector() == false) {
                if (sort != null) {
                    // TODO: instead of always passing false we
                    // should detect based on the query; if we make
                    // the IndexSearcher search methods that take
                    // Weight public again, we can go back to
                    // pulling the Weight ourselves:
                    TopFieldCollector collector = TopFieldCollector.create(sort, numHits, true, withScore(), withMaxScore());
                    searcher.search(q, collector);
                    hits = collector.topDocs();
                } else {
                    hits = searcher.search(q, numHits);
                }
            } else {
                Collector collector = createCollector();
                searcher.search(q, collector);
            //hits = collector.topDocs();
            }
            if (hits != null) {
                final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
                if (printHitsField != null && printHitsField.length() > 0) {
                    System.out.println("totalHits = " + hits.totalHits);
                    System.out.println("maxDoc()  = " + reader.maxDoc());
                    System.out.println("numDocs() = " + reader.numDocs());
                    for (int i = 0; i < hits.scoreDocs.length; i++) {
                        final int docID = hits.scoreDocs[i].doc;
                        final Document doc = reader.document(docID);
                        System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
                    }
                }
                res += withTopDocs(searcher, q, hits);
            }
        }
    }
    if (closeSearcher) {
        reader.close();
    } else {
        // Release our +1 ref from above
        reader.decRef();
    }
    return res;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Bits(org.apache.lucene.util.Bits) Sort(org.apache.lucene.search.Sort) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Directory(org.apache.lucene.store.Directory)

Aggregations

TopFieldCollector (org.apache.lucene.search.TopFieldCollector)13 TopDocs (org.apache.lucene.search.TopDocs)9 Sort (org.apache.lucene.search.Sort)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 Document (org.apache.lucene.document.Document)4 ScoreDoc (org.apache.lucene.search.ScoreDoc)4 Directory (org.apache.lucene.store.Directory)4 ArrayList (java.util.ArrayList)3 Collector (org.apache.lucene.search.Collector)3 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)3 HashSet (java.util.HashSet)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)2 StringField (org.apache.lucene.document.StringField)2 IndexReader (org.apache.lucene.index.IndexReader)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 EarlyTerminatingSortingCollector (org.apache.lucene.search.EarlyTerminatingSortingCollector)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2 TopScoreDocCollector (org.apache.lucene.search.TopScoreDocCollector)2 Bits (org.apache.lucene.util.Bits)2