Search in sources :

Example 36 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project elastic-core-maven by OrdinaryDude.

the class FullTextTrigger method search.

/**
 * Search the Lucene index
 *
 * The result set will have the following columns:
 *   SCHEMA  - Schema name (String)
 *   TABLE   - Table name (String)
 *   COLUMNS - Primary key column names (String[]) - this is always DB_ID
 *   KEYS    - Primary key values (Long[]) - this is always the DB_ID value for the table row
 *   SCORE   - Lucene score (Float)
 *
 * @param   conn                SQL connection
 * @param   schema              Schema name
 * @param   table               Table name
 * @param   queryText           Query expression
 * @param   limit               Number of rows to return
 * @param   offset              Offset with result set
 * @return                      Search results
 * @throws  SQLException        Unable to search the index
 */
public static ResultSet search(Connection conn, String schema, String table, String queryText, int limit, int offset) throws SQLException {
    // 
    // Get Lucene index access
    // 
    getIndexAccess(conn);
    // 
    // Create the result set columns
    // 
    SimpleResultSet result = new SimpleResultSet();
    result.addColumn("SCHEMA", Types.VARCHAR, 0, 0);
    result.addColumn("TABLE", Types.VARCHAR, 0, 0);
    result.addColumn("COLUMNS", Types.ARRAY, 0, 0);
    result.addColumn("KEYS", Types.ARRAY, 0, 0);
    result.addColumn("SCORE", Types.FLOAT, 0, 0);
    // 
    // Perform the search
    // 
    // The _QUERY field contains the table and row identification (schema.table;keyName;keyValue)
    // The _TABLE field is used to limit the search results to the current table
    // The _DATA field contains the indexed row data (this is the default search field)
    // The _MODIFIED field contains the row modification time (YYYYMMDDhhmmss) in GMT
    // 
    indexLock.readLock().lock();
    try {
        QueryParser parser = new QueryParser("_DATA", analyzer);
        parser.setDateResolution("_MODIFIED", DateTools.Resolution.SECOND);
        parser.setDefaultOperator(QueryParser.Operator.AND);
        Query query = parser.parse("_TABLE:" + schema.toUpperCase() + "." + table.toUpperCase() + " AND (" + queryText + ")");
        TopDocs documents = indexSearcher.search(query, limit);
        ScoreDoc[] hits = documents.scoreDocs;
        int resultCount = Math.min(hits.length, (limit == 0 ? hits.length : limit));
        int resultOffset = Math.min(offset, resultCount);
        for (int i = resultOffset; i < resultCount; i++) {
            Document document = indexSearcher.doc(hits[i].doc);
            String[] indexParts = document.get("_QUERY").split(";");
            String[] nameParts = indexParts[0].split("\\.");
            result.addRow(nameParts[0], nameParts[1], new String[] { indexParts[1] }, new Long[] { Long.parseLong(indexParts[2]) }, hits[i].score);
        }
    } catch (ParseException exc) {
        Logger.logDebugMessage("Lucene parse exception for query: " + queryText + "\n" + exc.getMessage());
        throw new SQLException("Lucene parse exception for query: " + queryText + "\n" + exc.getMessage());
    } catch (IOException exc) {
        Logger.logErrorMessage("Unable to search Lucene index", exc);
        throw new SQLException("Unable to search Lucene index", exc);
    } finally {
        indexLock.readLock().unlock();
    }
    return result;
}
Also used : SimpleResultSet(org.h2.tools.SimpleResultSet) Query(org.apache.lucene.search.Query) SQLException(java.sql.SQLException) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 37 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project janusgraph by JanusGraph.

the class LuceneIndex method totals.

@Override
public Long totals(RawQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
    final Query q;
    try {
        // writers.get(query.getStore()).getAnalyzer();
        final Analyzer analyzer = delegatingAnalyzerFor(query.getStore(), information);
        q = new QueryParser("_all", analyzer).parse(query.getQuery());
    } catch (final ParseException e) {
        throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
    }
    try {
        final IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
        // Index does not yet exist
        if (searcher == null)
            return 0L;
        final long time = System.currentTimeMillis();
        // Lucene doesn't like limits of 0.  Also, it doesn't efficiently build a total list.
        query.setLimit(1);
        // We ignore offset and limit for totals
        final TopDocs docs = searcher.search(q, 1);
        log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
        return docs.totalHits;
    } catch (final IOException e) {
        throw new TemporaryBackendException("Could not execute Lucene query", e);
    }
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 38 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class IndexUtils method printTermCounts.

public void printTermCounts(String termStr) throws IOException, ParseException {
    EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
    TermQuery q = (TermQuery) qp.parse(termStr);
    Term t = q.getTerm();
    System.out.println("raw term:             " + termStr);
    System.out.println("stemmed term:         " + q.toString(LuceneDocumentGenerator.FIELD_BODY));
    System.out.println("collection frequency: " + reader.totalTermFreq(t));
    System.out.println("document frequency:   " + reader.docFreq(t));
    PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes());
    System.out.println("postings:\n");
    while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
    }
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer)

Example 39 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class Rm3Reranker method rerank.

@Override
public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) {
    Preconditions.checkState(docs.documents.length == docs.scores.length);
    IndexSearcher searcher = context.getIndexSearcher();
    IndexReader reader = searcher.getIndexReader();
    FeatureVector qfv = FeatureVector.fromTerms(AnalyzerUtils.tokenize(analyzer, context.getQueryText())).scaleToUnitL1Norm();
    FeatureVector rm = estimateRelevanceModel(docs, reader);
    LOG.info("Relevance model estimated.");
    rm = FeatureVector.interpolate(qfv, rm, originalQueryWeight);
    StringBuilder builder = new StringBuilder();
    Iterator<String> terms = rm.iterator();
    while (terms.hasNext()) {
        String term = terms.next();
        double prob = rm.getFeatureWeight(term);
        builder.append(term + "^" + prob + " ");
    }
    String queryText = builder.toString().trim();
    QueryParser p = new QueryParser(field, new WhitespaceAnalyzer());
    Query nq = null;
    try {
        nq = p.parse(queryText);
    } catch (ParseException e) {
        e.printStackTrace();
        return docs;
    }
    LOG.info("Running new query: " + nq);
    TopDocs rs = null;
    try {
        if (context.getFilter() == null) {
            rs = searcher.search(nq, 1000);
        } else {
            BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
            bqBuilder.add(context.getFilter(), BooleanClause.Occur.FILTER);
            bqBuilder.add(nq, BooleanClause.Occur.MUST);
            Query q = bqBuilder.build();
            rs = searcher.search(q, 1000);
        }
    } catch (IOException e) {
        e.printStackTrace();
        return docs;
    }
    return ScoredDocuments.fromTopDocs(rs, searcher);
}
Also used : FeatureVector(io.anserini.util.FeatureVector) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) IOException(java.io.IOException) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 40 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.

the class EntityLinking method search.

/**
 * Returns a list of query results.
 *
 * @param queryName the entity name to search
 * @throws Exception on error
 * @return a list of top ranked entities
 */
public List<RankedEntity> search(String queryName, int numHits) throws Exception {
    List<RankedEntity> rankedEntities = new ArrayList<>();
    // Initialize index searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // do exact search on query name
    QueryParser queryParser = new QueryParser(IndexTopics.FIELD_NAME, new SimpleAnalyzer());
    queryParser.setAutoGeneratePhraseQueries(true);
    queryParser.setPhraseSlop(3);
    queryName = "\"" + queryName + "\"";
    Query query = queryParser.parse(queryName);
    TopDocs rs = searcher.search(query, numHits);
    ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        float score = docs.scores[i];
        String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
        String shortMid = getShortMid(mid);
        String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
        String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
        rankedEntities.add(new RankedEntity(shortMid, score, name, label));
    }
    if (docs.documents.length >= numHits) {
        return rankedEntities;
    }
    int numHitsLeft = numHits - docs.documents.length;
    // do TFIDF search
    Similarity similarity = new ClassicSimilarity();
    searcher.setSimilarity(similarity);
    queryParser = new MultiFieldQueryParser(new String[] { IndexTopics.FIELD_NAME, IndexTopics.FIELD_LABEL }, new SimpleAnalyzer());
    queryParser.setDefaultOperator(QueryParser.Operator.AND);
    query = queryParser.parse(queryName);
    rs = searcher.search(query, numHitsLeft);
    docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        float score = docs.scores[i];
        String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
        String shortMid = getShortMid(mid);
        String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
        String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
        rankedEntities.add(new RankedEntity(shortMid, score, name, label));
    }
    return rankedEntities;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Similarity(org.apache.lucene.search.similarities.Similarity) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) ArrayList(java.util.ArrayList) ScoredDocuments(io.anserini.rerank.ScoredDocuments) TopDocs(org.apache.lucene.search.TopDocs) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)73 Query (org.apache.lucene.search.Query)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)32 Document (org.apache.lucene.document.Document)26 IOException (java.io.IOException)24 Analyzer (org.apache.lucene.analysis.Analyzer)21 TopDocs (org.apache.lucene.search.TopDocs)21 IndexReader (org.apache.lucene.index.IndexReader)18 ScoreDoc (org.apache.lucene.search.ScoreDoc)18 ArrayList (java.util.ArrayList)16 ParseException (org.apache.lucene.queryparser.classic.ParseException)16 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)14 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 TermQuery (org.apache.lucene.search.TermQuery)13 ScoredDocuments (io.anserini.rerank.ScoredDocuments)6 Term (org.apache.lucene.index.Term)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5