Search in sources :

Example 96 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project searchcode-server by boyter.

the class CodeSearcher method getByCodeId.

/**
     * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
     * this should always work as the path used is sha1 and should be unique for anything the current codebase can
     * deal with
     */
public CodeResult getByCodeId(String codeId) {
    CodeResult codeResult = null;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));
        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;
        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);
            String filepath = doc.get(Values.PATH);
            List<String> code = new ArrayList<>();
            try {
                code = Singleton.getHelpers().readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt(Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }
            codeResult = new CodeResult(code, null);
            codeResult.setFilePath(filepath);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
            codeResult.setCodeId(doc.get(Values.CODEID));
        }
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return codeResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 97 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.

the class TweetSearcherAPI method search.

@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
    try {
        Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
        try {
            reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
        if (newReader != null) {
            reader.close();
            reader = newReader;
        }
        IndexSearcher searcher = new IndexSearcher(reader);
        int topN = query.getCount();
        TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        List<SearchResult> resultHits = new ArrayList<>();
        for (int i = 0; i < hits.length && i < topN; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
        }
        return resultHits;
    } catch (Exception e) {
        e.printStackTrace();
        return new ArrayList<>();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces)

Example 98 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.

the class TweetServlet method doGet.

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (request.getRequestURI().equals("/search")) {
        response.setStatus(HttpServletResponse.SC_OK);
        response.setContentType("text/html");
        request.setCharacterEncoding("UTF-8");
        Query q;
        try {
            q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
            try {
                reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
            if (newReader != null) {
                reader.close();
                reader = newReader;
            }
            IndexSearcher searcher = new IndexSearcher(reader);
            int topN;
            if (request.getParameter("top") != null) {
                topN = Integer.parseInt(request.getParameter("top"));
            } else {
                // TODO configurable, default(parameter unspecified in url) topN = 20
                topN = 20;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
            }
            MustacheFactory mf = new DefaultMustacheFactory();
            Mustache mustache = mf.compile(MustacheTemplatePath);
            mustache.execute(response.getWriter(), tweetHits).flush();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else {
        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) Mustache(com.github.mustachejava.Mustache) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) MustacheFactory(com.github.mustachejava.MustacheFactory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 99 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project Anserini by castorini.

the class BaseFeatureExtractor method printFeatures.

/**
     * Prints feature vectors wrt to the qrels, one vector per qrel
     * @param out
     * @throws IOException
     */
public void printFeatures(PrintStream out) throws IOException {
    Map<String, RerankerContext> queryContextMap = buildRerankerContextMap();
    FeatureExtractors extractors = getExtractors();
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Set<String> fieldsToLoad = getFieldsToLoad();
    // We need to open a searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    this.printHeader(out, extractors);
    // Iterate through all the qrels and for each document id we have for them
    LOG.debug("Processing queries");
    for (String qid : this.qrels.getQids()) {
        LOG.debug(String.format("Processing qid: %s", qid));
        // Get the map of documents
        RerankerContext context = queryContextMap.get(qid);
        for (Map.Entry<String, Integer> entry : this.qrels.getDocMap(qid).entrySet()) {
            String docId = entry.getKey();
            int qrelScore = entry.getValue();
            // We issue a specific query
            TopDocs topDocs = searcher.search(docIdQuery(docId), 1);
            if (topDocs.totalHits == 0) {
                LOG.warn(String.format("Document Id %s expected but not found in index, skipping...", docId));
                continue;
            }
            ScoreDoc hit = topDocs.scoreDocs[0];
            Document doc = reader.document(hit.doc, fieldsToLoad);
            //TODO factor for test
            Terms terms = reader.getTermVector(hit.doc, getTermVectorField());
            if (terms == null) {
                LOG.debug(String.format("No term vectors found for doc %s, qid %s", docId, qid));
                continue;
            }
            float[] featureValues = extractors.extractAll(doc, terms, context);
            writeFeatureVector(out, qid, qrelScore, docId, featureValues);
        }
        LOG.debug(String.format("Finished processing for qid: %s", qid));
        out.flush();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Terms(org.apache.lucene.index.Terms) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) FeatureExtractors(io.anserini.ltr.feature.FeatureExtractors) Bits(org.apache.lucene.util.Bits) RerankerContext(io.anserini.rerank.RerankerContext)

Example 100 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project ddf by codice.

the class GeoNamesQueryLuceneIndex method doGetNearestCities.

protected List<NearbyLocation> doGetNearestCities(final Shape shape, final int radiusInKm, final int maxResults, final Directory directory) throws GeoEntryQueryException {
    notNull(shape, "GeoNamesQueryLuceneIndex.doGetNearestCities(): argument 'shape' may not be null.");
    if (radiusInKm <= 0) {
        throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): radiusInKm must be positive.");
    }
    if (maxResults <= 0) {
        throw new IllegalArgumentException("GeoNamesQueryLuceneIndex.doGetNearestCities(): maxResults must be positive.");
    }
    if (directory == null) {
        return Collections.emptyList();
    }
    try (final IndexReader indexReader = createIndexReader(directory)) {
        final IndexSearcher indexSearcher = createIndexSearcher(indexReader);
        final List<NearbyLocation> closestCities = new ArrayList<>();
        final Point center = shape.getCenter();
        final Query filter = createSpatialQuery(center, radiusInKm);
        // Query for all the documents in the index that are cities, then filter those
        // results for the ones that are in the search area.
        final BooleanQuery booleanQuery = new BooleanQuery.Builder().add(PPL_QUERY, BooleanClause.Occur.MUST).add(filter, BooleanClause.Occur.FILTER).build();
        final TopDocs topDocs = indexSearcher.search(booleanQuery, maxResults, SORT);
        if (topDocs.totalHits > 0) {
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                final double lat = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LATITUDE_FIELD));
                final double lon = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.LONGITUDE_FIELD));
                final String name = indexSearcher.doc(scoreDoc.doc).get(GeoNamesLuceneConstants.NAME_FIELD);
                final NearbyLocation city = new NearbyLocationImpl(center, new PointImpl(lon, lat, SPATIAL_CONTEXT), name);
                closestCities.add(city);
            }
        }
        return closestCities;
    } catch (IOException e) {
        throw new GeoEntryQueryException("Error reading the index", e);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NearbyLocationImpl(org.codice.ddf.spatial.geocoding.context.impl.NearbyLocationImpl) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) GeoEntryQueryException(org.codice.ddf.spatial.geocoding.GeoEntryQueryException) ArrayList(java.util.ArrayList) Point(org.locationtech.spatial4j.shape.Point) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) NearbyLocation(org.codice.ddf.spatial.geocoding.context.NearbyLocation) PointImpl(org.locationtech.spatial4j.shape.impl.PointImpl)

Aggregations

ScoreDoc (org.apache.lucene.search.ScoreDoc)211 TopDocs (org.apache.lucene.search.TopDocs)119 IndexSearcher (org.apache.lucene.search.IndexSearcher)94 Document (org.apache.lucene.document.Document)89 Query (org.apache.lucene.search.Query)65 TermQuery (org.apache.lucene.search.TermQuery)49 ArrayList (java.util.ArrayList)46 IOException (java.io.IOException)44 IndexReader (org.apache.lucene.index.IndexReader)42 Term (org.apache.lucene.index.Term)38 Directory (org.apache.lucene.store.Directory)37 BooleanQuery (org.apache.lucene.search.BooleanQuery)26 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)23 Sort (org.apache.lucene.search.Sort)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)21 Test (org.junit.Test)21 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)20 FieldDoc (org.apache.lucene.search.FieldDoc)20 HashMap (java.util.HashMap)18 HashSet (java.util.HashSet)17