Search in sources :

Example 71 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class CodeSearcher method getRepoDocuments.

/**
     * Due to very large repositories (500,000 files) this needs to support
     * paging. Also need to consider the fact that is a list of strings
     * TODO maybe convert to hash so lookups are faster
     */
public List<String> getRepoDocuments(String repoName, int page) {
    int REPOPAGELIMIT = 1000;
    List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT);
    int start = REPOPAGELIMIT * page;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);
        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1)));
        ScoreDoc[] hits = results.scoreDocs;
        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.PATH));
        }
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page + "\n with message: " + ex.getMessage());
    }
    return fileLocations;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 72 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class CodeSearcher method getProjectStats.

public ProjectStats getProjectStats(String repoName) {
    int totalCodeLines = 0;
    int totalFiles = 0;
    List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>();
    List<CodeFacetOwner> repoFacetOwners = new ArrayList<>();
    List<CodeFacetLanguage> codeByLines = new ArrayList<>();
    SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib();
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);
        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;
        Map<String, Integer> linesCount = new HashMap<>();
        for (int i = 0; i < results.totalHits; i++) {
            Document doc = searcher.doc(hits[i].doc);
            if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) {
                int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0");
                totalCodeLines += lines;
                String languageName = doc.get(Values.LANGUAGENAME).replace("_", " ");
                if (linesCount.containsKey(languageName)) {
                    linesCount.put(languageName, linesCount.get(languageName) + lines);
                } else {
                    linesCount.put(languageName, lines);
                }
            }
        }
        for (String key : linesCount.keySet()) {
            codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key)));
        }
        codeByLines.sort((a, b) -> b.getCount() - a.getCount());
        totalFiles = results.totalHits;
        codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
        repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query);
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 73 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project janusgraph by JanusGraph.

the class LuceneIndex method query.

@Override
public Stream<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
    final Query q;
    try {
        // writers.get(query.getStore()).getAnalyzer();
        final Analyzer analyzer = delegatingAnalyzerFor(query.getStore(), information);
        q = new QueryParser("_all", analyzer).parse(query.getQuery());
    // Lucene query parser does not take additional parameters so any parameters on the RawQuery are ignored.
    } catch (final ParseException e) {
        throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
    }
    try {
        final IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
        if (searcher == null) {
            // Index does not yet exist
            return Collections.unmodifiableList(new ArrayList<RawQuery.Result<String>>()).stream();
        }
        final long time = System.currentTimeMillis();
        // TODO: can we make offset more efficient in Lucene?
        final int offset = query.getOffset();
        int adjustedLimit = query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1;
        if (adjustedLimit < Integer.MAX_VALUE - 1 - offset)
            adjustedLimit += offset;
        else
            adjustedLimit = Integer.MAX_VALUE - 1;
        final TopDocs docs = searcher.search(q, adjustedLimit);
        log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
        final List<RawQuery.Result<String>> result = new ArrayList<>(docs.scoreDocs.length);
        for (int i = offset; i < docs.scoreDocs.length; i++) {
            final IndexableField field = searcher.doc(docs.scoreDocs[i].doc).getField(DOCID);
            result.add(new RawQuery.Result<>(field == null ? null : field.stringValue(), docs.scoreDocs[i].score));
        }
        return result.stream();
    } catch (final IOException e) {
        throw new TemporaryBackendException("Could not execute Lucene query", e);
    }
}
Also used : IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)73 Query (org.apache.lucene.search.Query)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)32 Document (org.apache.lucene.document.Document)26 IOException (java.io.IOException)24 Analyzer (org.apache.lucene.analysis.Analyzer)21 TopDocs (org.apache.lucene.search.TopDocs)21 IndexReader (org.apache.lucene.index.IndexReader)18 ScoreDoc (org.apache.lucene.search.ScoreDoc)18 ArrayList (java.util.ArrayList)16 ParseException (org.apache.lucene.queryparser.classic.ParseException)16 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)14 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 TermQuery (org.apache.lucene.search.TermQuery)13 ScoredDocuments (io.anserini.rerank.ScoredDocuments)6 Term (org.apache.lucene.index.Term)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5