Search in sources :

Example 21 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project cogcomp-nlp by CogComp.

the class Lucene method getLuceneDocId.

/**
	 * returns tf-idf = (term_freq/inversve_doc_freq) for a given doc and a term. 
	 *
	 * @param reader
	 * @param docIdField
	 * @param docId
	 * @return
	 * @throws IOException
	 */
public static int getLuceneDocId(IndexReader reader, String docIdField, String docId) throws IOException {
    int luceneDocId = -1;
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(docIdField, KEYWORD);
    Query q = new TermQuery(new Term(docIdField, docId));
    ScoreDoc[] docs = searcher.search(q, 1).scoreDocs;
    if (docs.length == 0) {
        logger.error("Document with docId : " + docId + "  not found!");
        System.exit(0);
        return -1;
    } else {
        // Lucene DocId
        luceneDocId = docs[0].doc;
        return luceneDocId;
    }
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser)

Example 22 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class TimeCodeSearcher method getByRepoFileName.

/**
     * Attempts to find a unique file given the repository name and the path/filename however
     * it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented
     * a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue
     * inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear
     * when not limiting to the repo
     * TODO investigate the lucene issue that occurs here mentioned above
     * TODO needs to use the revision number here as well to get the right value
     */
public CodeResult getByRepoFileName(String repo, String fileName) {
    CodeResult codeResult = null;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        // TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate
        Query query = parser.parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
        Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;
        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);
            String filepath = doc.get(Values.PATH);
            List<String> code = new ArrayList<>();
            try {
                code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
                code = Singleton.getHelpers().readFileLines(filepath, Singleton.getHelpers().tryParseInt(Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }
            codeResult = new CodeResult(code, null);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
        }
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return codeResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 23 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class TimeCodeSearcher method getRepoDocuments.

public List<String> getRepoDocuments(String repoName) {
    List<String> fileLocations = new ArrayList<>();
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.REPONAME + ":" + repoName);
        TopDocs results = searcher.search(query, Integer.MAX_VALUE);
        ScoreDoc[] hits = results.scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            fileLocations.add(doc.get(Values.FILELOCATIONFILENAME));
        }
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return fileLocations;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 24 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class TimeCodeSearcher method search.

/**
     * Given a query and what page of results we are on return the matching results for that search
     */
public SearchResult search(String queryString, int page) {
    SearchResult searchResult = new SearchResult();
    statsService.incrementSearchCount();
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(queryString);
        LOGGER.info("Searching for: " + query.toString(CODEFIELD));
        searchResult = this.doPagingSearch(reader, searcher, query, page);
        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return searchResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) IndexReader(org.apache.lucene.index.IndexReader) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException)

Example 25 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class CodeSearcher method search.

/**
     * Given a query and what page of results we are on return the matching results for that search
     */
public SearchResult search(String queryString, int page) {
    SearchResult searchResult = new SearchResult();
    statsService.incrementSearchCount();
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(queryString);
        LOGGER.info("Searching for: " + query.toString(CODEFIELD));
        LOGGER.searchLog(query.toString(CODEFIELD) + " " + page);
        searchResult = this.doPagingSearch(reader, searcher, query, page);
        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return searchResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) IndexReader(org.apache.lucene.index.IndexReader) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)67 Query (org.apache.lucene.search.Query)46 IndexSearcher (org.apache.lucene.search.IndexSearcher)30 Document (org.apache.lucene.document.Document)25 IOException (java.io.IOException)19 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexReader (org.apache.lucene.index.IndexReader)18 TopDocs (org.apache.lucene.search.TopDocs)18 ScoreDoc (org.apache.lucene.search.ScoreDoc)17 ArrayList (java.util.ArrayList)14 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)13 ParseException (org.apache.lucene.queryparser.classic.ParseException)12 TermQuery (org.apache.lucene.search.TermQuery)11 Term (org.apache.lucene.index.Term)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5 ScoredDocuments (io.anserini.rerank.ScoredDocuments)4