Examples with ScoreDoc - org.apache.lucene.search.ScoreDoc

Example 91 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class DocToDoubleVectorUtilsTest method testDenseFreqDoubleArrayConversion.

@Test
public void testDenseFreqDoubleArrayConversion() throws Exception {
    IndexSearcher indexSearcher = new IndexSearcher(index);
    for (ScoreDoc scoreDoc : indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE).scoreDocs) {
        Terms docTerms = index.getTermVector(scoreDoc.doc, "text");
        Double[] vector = DocToDoubleVectorUtils.toDenseLocalFreqDoubleArray(docTerms);
        assertNotNull(vector);
        assertTrue(vector.length > 0);
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Terms(org.apache.lucene.index.Terms) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) Test(org.junit.Test)

Example 92 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class TestIndexingSequenceNumbers method testStressConcurrentAddAndDeleteAndCommit.

@Slow
public void testStressConcurrentAddAndDeleteAndCommit() throws Exception {
    final int opCount = atLeast(10000);
    final int idCount = TestUtil.nextInt(random(), 10, 1000);
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    // Cannot use RIW since it randomly commits:
    final IndexWriter w = new IndexWriter(dir, iwc);
    final int numThreads = TestUtil.nextInt(random(), 2, 5);
    Thread[] threads = new Thread[numThreads];
    //System.out.println("TEST: iter=" + iter + " opCount=" + opCount + " idCount=" + idCount + " threadCount=" + threads.length);
    final CountDownLatch startingGun = new CountDownLatch(1);
    List<List<Operation>> threadOps = new ArrayList<>();
    Object commitLock = new Object();
    final List<Operation> commits = new ArrayList<>();
    // multiple threads update the same set of documents, and we randomly commit
    for (int i = 0; i < threads.length; i++) {
        final List<Operation> ops = new ArrayList<>();
        threadOps.add(ops);
        final int threadID = i;
        threads[i] = new Thread() {

            @Override
            public void run() {
                try {
                    startingGun.await();
                    for (int i = 0; i < opCount; i++) {
                        Operation op = new Operation();
                        op.threadID = threadID;
                        if (random().nextInt(500) == 17) {
                            op.what = 2;
                            synchronized (commitLock) {
                                op.seqNo = w.commit();
                                if (op.seqNo != -1) {
                                    commits.add(op);
                                }
                            }
                        } else {
                            op.id = random().nextInt(idCount);
                            Term idTerm = new Term("id", "" + op.id);
                            if (random().nextInt(10) == 1) {
                                op.what = 1;
                                if (random().nextBoolean()) {
                                    op.seqNo = w.deleteDocuments(idTerm);
                                } else {
                                    op.seqNo = w.deleteDocuments(new TermQuery(idTerm));
                                }
                            } else {
                                Document doc = new Document();
                                doc.add(new StoredField("threadop", threadID + "-" + ops.size()));
                                doc.add(new StringField("id", "" + op.id, Field.Store.NO));
                                if (random().nextBoolean()) {
                                    List<Document> docs = new ArrayList<>();
                                    docs.add(doc);
                                    op.seqNo = w.addDocuments(docs);
                                } else {
                                    op.seqNo = w.addDocument(doc);
                                }
                                op.what = 3;
                            }
                            ops.add(op);
                        }
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[i].setName("thread" + threadID);
        threads[i].start();
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    Operation commitOp = new Operation();
    commitOp.seqNo = w.commit();
    if (commitOp.seqNo != -1) {
        commits.add(commitOp);
    }
    List<IndexCommit> indexCommits = DirectoryReader.listCommits(dir);
    assertEquals(commits.size(), indexCommits.size());
    // how many docs with this id are expected:
    int[] expectedCounts = new int[idCount];
    long[] lastDelSeqNos = new long[idCount];
    //System.out.println("TEST: " + commits.size() + " commits");
    for (int i = 0; i < commits.size(); i++) {
        // this commit point should reflect all operations <= this seqNo
        long commitSeqNo = commits.get(i).seqNo;
        //System.out.println("  commit " + i + ": seqNo=" + commitSeqNo + " segs=" + indexCommits.get(i));
        // first find the highest seqNo of the last delete op, for each id, prior to this commit:
        Arrays.fill(lastDelSeqNos, -1);
        for (int threadID = 0; threadID < threadOps.size(); threadID++) {
            long lastSeqNo = 0;
            for (Operation op : threadOps.get(threadID)) {
                if (op.what == 1 && op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id]) {
                    lastDelSeqNos[op.id] = op.seqNo;
                }
                // within one thread the seqNos must only increase:
                assertTrue(op.seqNo > lastSeqNo);
                lastSeqNo = op.seqNo;
            }
        }
        // then count how many adds happened since the last delete and before this commit:
        Arrays.fill(expectedCounts, 0);
        for (int threadID = 0; threadID < threadOps.size(); threadID++) {
            for (Operation op : threadOps.get(threadID)) {
                if (op.what == 3 && op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id]) {
                    expectedCounts[op.id]++;
                }
            }
        }
        DirectoryReader r = DirectoryReader.open(indexCommits.get(i));
        IndexSearcher s = new IndexSearcher(r);
        for (int id = 0; id < idCount; id++) {
            //System.out.println("TEST: check id=" + id + " expectedThreadID=" + expectedThreadIDs[id]);
            int actualCount = s.count(new TermQuery(new Term("id", "" + id)));
            if (expectedCounts[id] != actualCount) {
                System.out.println("TEST: FAIL r=" + r + " id=" + id + " commitSeqNo=" + commitSeqNo);
                for (int threadID = 0; threadID < threadOps.size(); threadID++) {
                    int opCount2 = 0;
                    for (Operation op : threadOps.get(threadID)) {
                        if (op.id == id) {
                            boolean shouldCount = op.seqNo <= commitSeqNo && op.seqNo > lastDelSeqNos[op.id];
                            System.out.println("  id=" + id + " what=" + op.what + " threadop=" + threadID + "-" + opCount2 + " seqNo=" + op.seqNo + " vs lastDelSeqNo=" + lastDelSeqNos[op.id] + " shouldCount=" + shouldCount);
                        }
                        opCount2++;
                    }
                }
                TopDocs hits = s.search(new TermQuery(new Term("id", "" + id)), 1 + actualCount);
                for (ScoreDoc hit : hits.scoreDocs) {
                    System.out.println("  hit: " + s.doc(hit.doc).get("threadop"));
                }
                for (LeafReaderContext ctx : r.leaves()) {
                    System.out.println("  sub=" + ctx.reader());
                    Bits liveDocs = ctx.reader().getLiveDocs();
                    for (int docID = 0; docID < ctx.reader().maxDoc(); docID++) {
                        System.out.println("    docID=" + docID + " threadop=" + ctx.reader().document(docID).get("threadop") + (liveDocs != null && liveDocs.get(docID) == false ? " (deleted)" : ""));
                    }
                }
                assertEquals("commit " + i + " of " + commits.size() + " id=" + id + " reader=" + r, expectedCounts[id], actualCount);
            }
        }
        w.close();
        r.close();
    }
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) ArrayList(java.util.ArrayList) List(java.util.List) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) CountDownLatch(java.util.concurrent.CountDownLatch) StringField(org.apache.lucene.document.StringField) Bits(org.apache.lucene.util.Bits)

Example 93 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.

the class TweetSearcherAPI method search.

@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
    try {
        Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
        try {
            reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
        if (newReader != null) {
            reader.close();
            reader = newReader;
        }
        IndexSearcher searcher = new IndexSearcher(reader);
        int topN = query.getCount();
        TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        List<SearchResult> resultHits = new ArrayList<>();
        for (int i = 0; i < hits.length && i < topN; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
        }
        return resultHits;
    } catch (Exception e) {
        e.printStackTrace();
        return new ArrayList<>();
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces)

Example 94 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.

the class TweetServlet method doGet.

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (request.getRequestURI().equals("/search")) {
        response.setStatus(HttpServletResponse.SC_OK);
        response.setContentType("text/html");
        request.setCharacterEncoding("UTF-8");
        Query q;
        try {
            q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
            try {
                reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
            if (newReader != null) {
                reader.close();
                reader = newReader;
            }
            IndexSearcher searcher = new IndexSearcher(reader);
            int topN;
            if (request.getParameter("top") != null) {
                topN = Integer.parseInt(request.getParameter("top"));
            } else {
                // TODO configurable, default(parameter unspecified in url) topN = 20
                topN = 20;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
            }
            MustacheFactory mf = new DefaultMustacheFactory();
            Mustache mustache = mf.compile(MustacheTemplatePath);
            mustache.execute(response.getWriter(), tweetHits).flush();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else {
        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) Mustache(com.github.mustachejava.Mustache) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) MustacheFactory(com.github.mustachejava.MustacheFactory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 95 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project ddf by codice.

the class GeoNamesQueryLuceneIndex method doGetNearestCities.

protected List<NearbyLocation> doGetNearestCities(final Shape shape, final int radiusInKm, final int maxResults, final Directory directory) throws GeoEntryQueryException {
    notNull(shape, "GeoNamesQueryLuceneIndex.doGetNearestCities(): argument 'shape' may not be null.");
    if (radiusInKm <= 0) {
        throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): radiusInKm must be positive.");
    }
    if (maxResults <= 0) {
        throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): maxResults must be positive.");
    }
    if (directory == null) {
        return Collections.emptyList();
    }
    try (final IndexReader indexReader = createIndexReader(directory)) {
        final IndexSearcher indexSearcher = createIndexSearcher(indexReader);
        final List<NearbyLocation> closestCities = new ArrayList<>();
        final Point center = shape.getCenter();
        final Query filter = createSpatialQuery(center, radiusInKm);
        // Query for all the documents in the index that are cities, then filter those
        // results for the ones that are in the search area.
        final BooleanQuery booleanQuery = new BooleanQuery.Builder().add(PPL_QUERY, BooleanClause.Occur.MUST).add(filter, BooleanClause.Occur.FILTER).build();
        final TopDocs topDocs = indexSearcher.search(booleanQuery, maxResults, SORT);
        if (topDocs.totalHits > 0) {
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                final double lat = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LATITUDE_FIELD));
                final double lon = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LONGITUDE_FIELD));
                final String name = indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.NAME_FIELD);
                final NearbyLocation city = new NearbyLocationImpl(center, new PointImpl(lon, lat, SPATIAL_CONTEXT), name);
                closestCities.add(city);
            }
        }
        return closestCities;
    } catch (IOException e) {
        throw new GeoEntryQueryException("Error reading the index", e);
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NearbyLocationImpl(org.codice.ddf.spatial.geocoding.context.impl.NearbyLocationImpl) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) GeoEntryQueryException(org.codice.ddf.spatial.geocoding.GeoEntryQueryException) ArrayList(java.util.ArrayList) Point(org.locationtech.spatial4j.shape.Point) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) NearbyLocation(org.codice.ddf.spatial.geocoding.context.NearbyLocation) PointImpl(org.locationtech.spatial4j.shape.impl.PointImpl)

Aggregations

ScoreDoc (org.apache.lucene.search.ScoreDoc)222 TopDocs (org.apache.lucene.search.TopDocs)124 IndexSearcher (org.apache.lucene.search.IndexSearcher)98 Document (org.apache.lucene.document.Document)95 Query (org.apache.lucene.search.Query)71 TermQuery (org.apache.lucene.search.TermQuery)52 IOException (java.io.IOException)48 ArrayList (java.util.ArrayList)46 IndexReader (org.apache.lucene.index.IndexReader)45 Term (org.apache.lucene.index.Term)39 Directory (org.apache.lucene.store.Directory)37 BooleanQuery (org.apache.lucene.search.BooleanQuery)27 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)26 Test (org.junit.Test)23 Sort (org.apache.lucene.search.Sort)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)21 HashMap (java.util.HashMap)20 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)20 FieldDoc (org.apache.lucene.search.FieldDoc)20 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)18