Search in sources :

Example 1 with MultiFieldQueryParser

use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project textdb by TextDB.

the class FuzzyTokenMatcherSourceOperator method createLuceneQueryObject.

public static Query createLuceneQueryObject(FuzzyTokenPredicate predicate) throws DataFlowException {
    try {
             * By default the boolean query takes 1024 # of clauses as the max
             * limit. Since our input query has no limitaion on the number of
             * tokens, we have to put a check.
        if (predicate.getThreshold() > 1024)
            BooleanQuery.setMaxClauseCount(predicate.getThreshold() + 1);
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        MultiFieldQueryParser qp = new MultiFieldQueryParser(predicate.getAttributeNames().stream().toArray(String[]::new), LuceneAnalyzerConstants.getLuceneAnalyzer(predicate.getLuceneAnalyzerStr()));
        for (String s : predicate.getQueryTokens()) {
            builder.add(qp.parse(s), Occur.SHOULD);
    } catch (ParseException e) {
        throw new DataFlowException(e);
Also used : BooleanQuery( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 2 with MultiFieldQueryParser

use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project Anserini by castorini.

the class LookupTopic method search.

   * Prints query results to the standard output stream.
   * @param queryName the entity name to search
   * @throws Exception on error
public void search(String queryName) throws Exception {"Querying started...");
    // Initialize index searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    SimpleAnalyzer analyzer = new SimpleAnalyzer();
    int numHits = 20;
    // find exact title
    QueryParser titleParser = new QueryParser(TopicLuceneDocumentGenerator.FIELD_TITLE, analyzer);
    Query titleQuery = titleParser.parse(queryName);
    TopDocs rs =, numHits);
    ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        String resultDoc = String.format("%d - SCORE: %f\nTOPIC_MID: %s\nWIKI_TITLE: %s\nW3_LABEL: %s\n\n", (i + 1), docs.scores[i], docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TOPIC_MID).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TITLE).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_LABEL).stringValue());
    if (docs.documents.length != 0) {
        System.out.println("Exact WIKI_TITLE found! Ending search.");
    } else {
        System.out.println("Exact WIKI_TITLE not found. Searching for the label...");
    // find exact label
    QueryParser labelParser = new QueryParser(TopicLuceneDocumentGenerator.FIELD_LABEL, analyzer);
    Query labelQuery = labelParser.parse(queryName);
    rs =, numHits);
    docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        String resultDoc = String.format("%d - SCORE: %f\nTOPIC_MID: %s\nWIKI_TITLE: %s\nW3_LABEL: %s\n\n", (i + 1), docs.scores[i], docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TOPIC_MID).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TITLE).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_LABEL).stringValue());
    if (docs.documents.length != 0) {
        System.out.println("Exact W3_LABEL found! Ending search.");
    } else {
        System.out.println("Exact W3_LABEL not found. Ranking the topics using BM25 according the text/title/label...");
    float k1 = 1.5f;
    float b = 0.75f;
    Similarity similarity = new BM25Similarity(k1, b);
    MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new String[] { TopicLuceneDocumentGenerator.FIELD_TITLE, TopicLuceneDocumentGenerator.FIELD_LABEL, TopicLuceneDocumentGenerator.FIELD_TEXT }, analyzer);
    Query query = queryParser.parse(queryName);
    rs =, numHits);
    docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        String resultDoc = String.format("%d - SCORE: %f\nTOPIC_MID: %s\nWIKI_TITLE: %s\nW3_LABEL: %s\n", (i + 1), docs.scores[i], docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TOPIC_MID).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_TITLE).stringValue(), docs.documents[i].getField(TopicLuceneDocumentGenerator.FIELD_LABEL).stringValue());
    }"Querying completed.");
Also used : IndexSearcher( TopDocs( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query( Similarity( BM25Similarity( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) ScoredDocuments(io.anserini.rerank.ScoredDocuments) BM25Similarity(

Example 3 with MultiFieldQueryParser

use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project Anserini by castorini.

the class LookupTopic method search.

 * Prints all known facts about a particular mid.
 * @param queryName query topic name
 * @throws Exception on error
public void search(String queryName, int numHits) throws Exception {
    // Initialize index searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // search for query in multiple fields
    MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new String[] { IndexTopics.FIELD_NAME, IndexTopics.FIELD_LABEL, IndexTopics.FIELD_ALIAS }, new SimpleAnalyzer());
    Query query = queryParser.parse(queryName);
    TopDocs rs =, numHits);
    ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
    for (int i = 0; i < docs.documents.length; i++) {
        String resultDoc = String.format("%d - SCORE: %f\nTOPIC_MID: %s\nOBJECT_NAME: %s\nWIKI_TITLE: %s\nW3_LABEL: %s\n", (i + 1), docs.scores[i], docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue(), docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue(), docs.documents[i].getField(IndexTopics.FIELD_ALIAS).stringValue(), docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue());
Also used : IndexSearcher( TopDocs( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) Query( SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) ScoredDocuments(io.anserini.rerank.ScoredDocuments)

Example 4 with MultiFieldQueryParser

use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project carbondata by apache.

the class LuceneFineGrainDataMap method prune.

 * Prune the datamap with filter expression. It returns the list of
 * blocklets where these filters can exist.
public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException {
    // convert filter expr into lucene list query
    List<String> fields = new ArrayList<String>();
    // only for test , query all data
    String strQuery = getQueryString(filterExp.getFilterExpression());
    String[] sFields = new String[fields.size()];
    // get analyzer
    if (analyzer == null) {
        analyzer = new StandardAnalyzer();
    // use MultiFieldQueryParser to parser query
    QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
    Query query;
    try {
        query = queryParser.parse(strQuery);
    } catch (ParseException e) {
        String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage());
        return null;
    // execute index search
    TopDocs result;
    try {
        result =, MAX_RESULT_NUMBER);
    } catch (IOException e) {
        String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
        throw new IOException(errorMessage);
    // temporary data, delete duplicated data
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    Map<String, Map<String, Map<Integer, Set<Integer>>>> mapBlocks = new HashMap<>();
    for (ScoreDoc scoreDoc : result.scoreDocs) {
        // get a document
        Document doc = indexSearcher.doc(scoreDoc.doc);
        // get all fields
        List<IndexableField> fieldsInDoc = doc.getFields();
        // get this block id Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
        String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
        Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlocks.get(blockId);
        if (mapBlocklets == null) {
            mapBlocklets = new HashMap<>();
            mapBlocks.put(blockId, mapBlocklets);
        // get the blocklet id Map<BlockletId, Map<PageId, Set<RowId>>>
        String blockletId = fieldsInDoc.get(BLOCKLETID_ID).stringValue();
        Map<Integer, Set<Integer>> mapPageIds = mapBlocklets.get(blockletId);
        if (mapPageIds == null) {
            mapPageIds = new HashMap<>();
            mapBlocklets.put(blockletId, mapPageIds);
        // get the page id Map<PageId, Set<RowId>>
        Number pageId = fieldsInDoc.get(PAGEID_ID).numericValue();
        Set<Integer> setRowId = mapPageIds.get(pageId.intValue());
        if (setRowId == null) {
            setRowId = new HashSet<>();
            mapPageIds.put(pageId.intValue(), setRowId);
        // get the row id Set<RowId>
        Number rowId = fieldsInDoc.get(ROWID_ID).numericValue();
    // result blocklets
    List<FineGrainBlocklet> blocklets = new ArrayList<>();
    // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
    for (Map.Entry<String, Map<String, Map<Integer, Set<Integer>>>> mapBlock : mapBlocks.entrySet()) {
        String blockId = mapBlock.getKey();
        Map<String, Map<Integer, Set<Integer>>> mapBlocklets = mapBlock.getValue();
        // for blocklets in this block Map<BlockletId, Map<PageId, Set<RowId>>>
        for (Map.Entry<String, Map<Integer, Set<Integer>>> mapBlocklet : mapBlocklets.entrySet()) {
            String blockletId = mapBlocklet.getKey();
            Map<Integer, Set<Integer>> mapPageIds = mapBlocklet.getValue();
            List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>();
            // for pages in this blocklet Map<PageId, Set<RowId>>>
            for (Map.Entry<Integer, Set<Integer>> mapPageId : mapPageIds.entrySet()) {
                // construct array rowid
                int[] rowIds = new int[mapPageId.getValue().size()];
                int i = 0;
                // for rowids in this page Set<RowId>
                for (Integer rowid : mapPageId.getValue()) {
                    rowIds[i++] = rowid;
                // construct one page
                FineGrainBlocklet.Page page = new FineGrainBlocklet.Page();
                // add this page into list pages
            // add a FineGrainBlocklet
            blocklets.add(new FineGrainBlocklet(blockId, blockletId, pages));
    return blocklets;
Also used : Document(org.apache.lucene.document.Document) FineGrainBlocklet( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) IOException( IndexableField(org.apache.lucene.index.IndexableField) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) FineGrainDataMap(

Example 5 with MultiFieldQueryParser

use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project epadd by ePADD.

the class Highlighter method highlight.

private static String highlight(String content, String term, String preTag, String postTag) throws IOException, ParseException, InvalidTokenOffsetsException {
    // The Lucene Highlighter is used in a hacky way here, it is intended to be used to retrieve fragments from a matching Lucene document.
    // The Lucene Highlighter introduces tags around every token that matched the query, hence it is required to merge these fragmented annotations into one inorder to fit our needs.
    // To truly differentiate contiguous fragments that match a term supplied we add a unique id to the pretag, hence the randum instance
    // TODO: Explain what is happening here
    // Version lv = Indexer.LUCENE_VERSION;
    // hell with reset close, stuff. initialized two analyzers to evade the problem.
    // TODO: get rid of two analyzers.
    Analyzer snAnalyzer, snAnalyzer2;
    snAnalyzer = new EnglishNumberAnalyzer(CharArraySet.EMPTY_SET);
    snAnalyzer2 = new EnglishNumberAnalyzer(CharArraySet.EMPTY_SET);
    Fragmenter fragmenter = new NullFragmenter();
    QueryParser qp = new MultiFieldQueryParser(new String[] { "" }, snAnalyzer2);
    BooleanQuery.Builder querybuilder = new BooleanQuery.Builder();
    TokenStream stream = snAnalyzer.tokenStream(null, new StringReader(content));
    int r = randnum.nextInt();
    String upreTag = preTag.replaceAll(">$", " data-ignore=" + r + " >");
    Formatter formatter = new SimpleHTMLFormatter(upreTag, postTag);
    // Parse exception may occur while parsing terms like "AND", "OR" etc.
    try {
        querybuilder.add(new BooleanClause(qp.parse(term), BooleanClause.Occur.SHOULD));
    } catch (ParseException pe) {
        if (log.isDebugEnabled())
            log.debug("Exception while parsing: " + term, pe);
        return content;
    Scorer scorer = new QueryScorer(; highlighter = new, scorer);
    highlighter.setMaxDocCharsToAnalyze(Math.max(, content.length()));
    String result = highlighter.getBestFragment(stream, content);
    if (result != null) {
        result = mergeContiguousFragments(result, term, upreTag, postTag);
        // and then remove the extra info. we appended to the tags
        result = result.replaceAll(" data-ignore=" + r + " >", ">");
        return result;
    } else
        return content;
Also used : BooleanQuery( TokenStream(org.apache.lucene.analysis.TokenStream) Formatter( Analyzer(org.apache.lucene.analysis.Analyzer) StringReader( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) BooleanClause( MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException)


MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)32 ParseException (org.apache.lucene.queryparser.classic.ParseException)19 Query ( StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)13 TermQuery ( WildcardQuery ( QueryParser (org.apache.lucene.queryparser.classic.QueryParser)9 IndexSearcher ( ModuleException (it.vige.rubia.ModuleException)8 ResultPage ( Searching ( SortBy ( SortOrder ( IOException ( EntityManager (javax.persistence.EntityManager)8 Term (org.apache.lucene.index.Term)8 Builder ( FullTextQuery ( FullTextSession ( Search.getFullTextSession (