Search in sources :

Example 31 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class CodeSearcher method search.

/**
     * Given a query and what page of results we are on return the matching results for that search
     */
public SearchResult search(String queryString, int page) {
    SearchResult searchResult = new SearchResult();
    statsService.incrementSearchCount();
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(queryString);
        LOGGER.info("Searching for: " + query.toString(CODEFIELD));
        LOGGER.searchLog(query.toString(CODEFIELD) + " " + page);
        searchResult = this.doPagingSearch(reader, searcher, query, page);
        reader.close();
    } catch (Exception ex) {
        LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return searchResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) IndexReader(org.apache.lucene.index.IndexReader) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException)

Example 32 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.

the class CodeSearcher method getByCodeId.

/**
     * Only used as fallback if getByRepoFileName fails for some reason due to what appears to be a lucene index bug
     * this should always work as the path used is sha1 and should be unique for anything the current codebase can
     * deal with
     */
public CodeResult getByCodeId(String codeId) {
    CodeResult codeResult = null;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new CodeAnalyzer();
        QueryParser parser = new QueryParser(CODEFIELD, analyzer);
        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        Singleton.getLogger().info("Query to get by " + Values.CODEID + ":" + QueryParser.escape(codeId));
        TopDocs results = searcher.search(query, 1);
        ScoreDoc[] hits = results.scoreDocs;
        if (hits.length != 0) {
            Document doc = searcher.doc(hits[0].doc);
            String filepath = doc.get(Values.PATH);
            List<String> code = new ArrayList<>();
            try {
                code = Singleton.getHelpers().readFileLinesGuessEncoding(filepath, Singleton.getHelpers().tryParseInt(Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH));
            } catch (Exception ex) {
                Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
            }
            codeResult = new CodeResult(code, null);
            codeResult.setFilePath(filepath);
            codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
            codeResult.setFileName(doc.get(Values.FILENAME));
            codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
            codeResult.setMd5hash(doc.get(Values.MD5HASH));
            codeResult.setCodeLines(doc.get(Values.CODELINES));
            codeResult.setDocumentId(hits[0].doc);
            codeResult.setRepoName(doc.get(Values.REPONAME));
            codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
            codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
            codeResult.setCodeId(doc.get(Values.CODEID));
        }
        reader.close();
    } catch (Exception ex) {
        LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }
    return codeResult;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader)

Example 33 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class EntityLinking method exactQuerySearch.

/**
 * Returns a list of query results.
 *
 * @param queryName the entity name to search
 * @throws Exception on error
 * @return a list of top ranked entities
 */
public List<RankedEntity> exactQuerySearch(String queryName, int numHits) throws Exception {
    List<RankedEntity> rankedEntities = new ArrayList<>();
    // Initialize index searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // do exact search on query name
    QueryParser queryParser = new QueryParser(IndexTopics.FIELD_NAME, new SimpleAnalyzer());
    queryParser.setAutoGeneratePhraseQueries(true);
    queryParser.setPhraseSlop(3);
    queryName = "\"" + queryName + "\"";
    Query query = queryParser.parse(queryName);
    TopDocs rs = searcher.search(query, numHits);
    ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        float score = docs.scores[i];
        String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
        String shortMid = getShortMid(mid);
        String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
        String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
        rankedEntities.add(new RankedEntity(shortMid, score, name, label));
    }
    return rankedEntities;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) ArrayList(java.util.ArrayList) ScoredDocuments(io.anserini.rerank.ScoredDocuments)

Example 34 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class IdfPassageScorer method getTermIdfJSON.

@Override
public JSONObject getTermIdfJSON(List<String> sentList) {
    EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
    ClassicSimilarity similarity = new ClassicSimilarity();
    for (String sent : sentList) {
        String[] thisSentence = sent.trim().split("\\s+");
        for (String term : thisSentence) {
            try {
                TermQuery q = (TermQuery) qp.parse(term);
                Term t = q.getTerm();
                double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
                termIdfMap.put(term, String.valueOf(termIDF));
            } catch (Exception e) {
                continue;
            }
        }
    }
    return new JSONObject(termIdfMap);
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TermQuery(org.apache.lucene.search.TermQuery) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) JSONObject(org.json.JSONObject) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) Term(org.apache.lucene.index.Term) IOException(java.io.IOException)

Example 35 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project HongsCORE by ihongs.

the class SearchQuery method get.

@Override
public Query get(String k, Object v) {
    try {
        QueryParser qp = new QueryParser(k, ana != null ? ana : new StandardAnalyzer());
        String s = v.toString();
        if (des == null || !des) {
            s = QueryParser.escape(s);
        }
        if (and != null && and) {
            qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        }
        if (epi != null)
            qp.setEnablePositionIncrements(epi);
        if (let != null)
            qp.setLowercaseExpandedTerms(let);
        if (alw != null)
            qp.setAllowLeadingWildcard(alw);
        if (fpl != null)
            qp.setFuzzyPrefixLength(fpl);
        if (fms != null)
            qp.setFuzzyMinSim(fms);
        if (phr != null)
            qp.setPhraseSlop(phr);
        Query q2 = qp.parse(s);
        return q2;
    } catch (ParseException ex) {
        throw new HongsExpedient.Common(ex);
    }
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) HongsExpedient(app.hongs.HongsExpedient) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)73 Query (org.apache.lucene.search.Query)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)32 Document (org.apache.lucene.document.Document)26 IOException (java.io.IOException)24 Analyzer (org.apache.lucene.analysis.Analyzer)21 TopDocs (org.apache.lucene.search.TopDocs)21 IndexReader (org.apache.lucene.index.IndexReader)18 ScoreDoc (org.apache.lucene.search.ScoreDoc)18 ArrayList (java.util.ArrayList)16 ParseException (org.apache.lucene.queryparser.classic.ParseException)16 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)14 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 TermQuery (org.apache.lucene.search.TermQuery)13 ScoredDocuments (io.anserini.rerank.ScoredDocuments)6 Term (org.apache.lucene.index.Term)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5