Search in sources :

Example 6 with ParseException

use of org.apache.lucene.queryparser.classic.ParseException in project jackrabbit-oak by apache.

the class LucenePropertyIndex method getLuceneRequest.

/**
     * Get the Lucene query for the given filter.
     *
     * @param plan index plan containing filter details
     * @param reader the Lucene reader
     * @return the Lucene query
     */
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
    FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
    List<Query> qs = new ArrayList<Query>();
    Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    PlanResult planResult = getPlanResult(plan);
    IndexDefinition defn = planResult.indexDefinition;
    Analyzer analyzer = defn.getAnalyzer();
    if (ft == null) {
    // there might be no full-text constraint
    // when using the LowCostLuceneIndexProvider
    // which is used for testing
    } else {
        qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
    }
    //Check if native function is supported
    PropertyRestriction pr = null;
    if (defn.hasFunctionDefined()) {
        pr = filter.getPropertyRestriction(defn.getFunctionName());
    }
    if (pr != null) {
        String query = String.valueOf(pr.first.getValue(pr.first.getType()));
        QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
        if (query.startsWith("mlt?")) {
            String mltQueryString = query.replace("mlt?", "");
            if (reader != null) {
                Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
                if (moreLikeThis != null) {
                    qs.add(moreLikeThis);
                }
            }
        } else if (query.startsWith("spellcheck?")) {
            String spellcheckQueryString = query.replace("spellcheck?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
            }
        } else if (query.startsWith("suggest?")) {
            String suggestQueryString = query.replace("suggest?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
            }
        } else {
            try {
                qs.add(queryParser.parse(query));
            } catch (ParseException e) {
                throw new RuntimeException(e);
            }
        }
    } else if (planResult.evaluateNonFullTextConstraints()) {
        addNonFullTextConstraints(qs, plan, reader);
    }
    if (qs.size() == 0 && plan.getSortOrder() != null) {
        //This case indicates that query just had order by and no
        //property restriction defined. In this case property
        //existence queries for each sort entry
        List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
        for (int i = 0; i < orders.size(); i++) {
            OrderEntry oe = orders.get(i);
            PropertyDefinition pd = planResult.getOrderedProperty(i);
            PropertyRestriction orderRest = new PropertyRestriction();
            orderRest.propertyName = oe.getPropertyName();
            Query q = createQuery(orderRest, pd);
            if (q != null) {
                qs.add(q);
            }
        }
    }
    if (qs.size() == 0) {
        if (reader == null) {
            //just return match all queries
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        //be returned (if the index definition has a single rule)
        if (planResult.evaluateNodeTypeRestriction()) {
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        throw new IllegalStateException("No query created for filter " + filter);
    }
    return performAdditionalWraps(qs);
}
Also used : PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) PlanResult(org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) ArrayList(java.util.ArrayList) FulltextQueryTermsProvider(org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider) Analyzer(org.apache.lucene.analysis.Analyzer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) StandardQueryParser(org.apache.lucene.queryparser.flexible.standard.StandardQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.query.fulltext.FullTextExpression) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 7 with ParseException

use of org.apache.lucene.queryparser.classic.ParseException in project lucene-solr by apache.

the class FileBasedQueryMaker method prepareQueries.

@Override
protected Query[] prepareQueries() throws Exception {
    Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer"));
    String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
    QueryParser qp = new QueryParser(defaultField, anlzr);
    qp.setAllowLeadingWildcard(true);
    List<Query> qq = new ArrayList<>();
    String fileName = config.get("file.query.maker.file", null);
    if (fileName != null) {
        Path path = Paths.get(fileName);
        Reader reader = null;
        // note: we use a decoding reader, so if your queries are screwed up you know
        if (Files.exists(path)) {
            reader = Files.newBufferedReader(path, StandardCharsets.UTF_8);
        } else {
            //see if we can find it as a resource
            InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
            if (asStream != null) {
                reader = IOUtils.getDecodingReader(asStream, StandardCharsets.UTF_8);
            }
        }
        if (reader != null) {
            try {
                BufferedReader buffered = new BufferedReader(reader);
                String line = null;
                int lineNum = 0;
                while ((line = buffered.readLine()) != null) {
                    line = line.trim();
                    if (line.length() != 0 && !line.startsWith("#")) {
                        try {
                            qq.add(qp.parse(line));
                        } catch (ParseException e) {
                            System.err.println("Exception: " + e.getMessage() + " occurred while parsing line: " + lineNum + " Text: " + line);
                        }
                    }
                    lineNum++;
                }
            } finally {
                reader.close();
            }
        } else {
            System.err.println("No Reader available for: " + fileName);
        }
    }
    return qq.toArray(new Query[qq.size()]);
}
Also used : Path(java.nio.file.Path) Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 8 with ParseException

use of org.apache.lucene.queryparser.classic.ParseException in project lucene-solr by apache.

the class UserInputQueryBuilder method getQuery.

/* (non-Javadoc)
    * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
    */
@Override
public Query getQuery(Element e) throws ParserException {
    String text = DOMUtils.getText(e);
    try {
        Query q = null;
        if (unSafeParser != null) {
            //synchronize on unsafe parser
            synchronized (unSafeParser) {
                q = unSafeParser.parse(text);
            }
        } else {
            String fieldName = DOMUtils.getAttribute(e, "fieldName", defaultField);
            //Create new parser
            QueryParser parser = createQueryParser(fieldName, analyzer);
            q = parser.parse(text);
        }
        float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
        return new BoostQuery(q, boost);
    } catch (ParseException e1) {
        throw new ParserException(e1.getMessage());
    }
}
Also used : ParserException(org.apache.lucene.queryparser.xml.ParserException) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) BoostQuery(org.apache.lucene.search.BoostQuery) ParseException(org.apache.lucene.queryparser.classic.ParseException) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 9 with ParseException

use of org.apache.lucene.queryparser.classic.ParseException in project Anserini by castorini.

the class TRECScenarioRunnable method run.

@SuppressWarnings("deprecation")
@Override
public void run() {
    LOG.info("Running TRECScenarioSearcher Thread for " + thisInterestProfile.topicIndex);
    try {
        // When the thread wakes up at a new day, clear pushed tweets
        if ((scenario.equals("A") && Calendar.getInstance(TimeZone.getTimeZone("UTC")).get(Calendar.DAY_OF_YEAR) != now.get(Calendar.DAY_OF_YEAR)) || (scenario.equals("B")))
            pushedTweets.clear();
        Query titleQuery = new QueryParser(TRECIndexerRunnable.StatusField.TEXT.name, Indexer.ANALYZER).parse(thisInterestProfile.titleQueryString());
        LOG.info("Parsed titleQuery " + titleQuery.getClass() + " looks like " + titleQuery.toString() + " " + titleQuery.getClass());
        reader = DirectoryReader.open(FSDirectory.open(new File(indexPath).toPath()));
        IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader);
        if (newReader != null) {
            reader.close();
            reader = newReader;
        }
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setSimilarity(titleCoordSimilarity);
        // Get the total number of hits
        TotalHitCountCollector totalHitCollector = new TotalHitCountCollector();
        // First search and scoring part: titleCoordSimilarity(q,d) = Nt/T
        searcher.search(titleQuery, totalHitCollector);
        // Create a collector for these hits
        if (totalHitCollector.getTotalHits() > 0) {
            TopScoreDocCollector titleQueryHitCollector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits()));
            searcher.search(titleQuery, titleQueryHitCollector);
            ScoreDoc[] coordHits = titleQueryHitCollector.topDocs().scoreDocs;
            HashMap<Integer, Float> coordHMap = new HashMap<Integer, Float>();
            for (ScoreDoc s : coordHits) {
                coordHMap.put(s.doc, s.score);
            }
            LOG.info("Title coordinate similarity has " + totalHitCollector.getTotalHits() + " hits");
            Query titleExpansionQuery = new QueryParser(TRECIndexerRunnable.StatusField.TEXT.name, Indexer.ANALYZER).parse(thisInterestProfile.titleExpansionQueryString(titleBoostFactor, expansionBoostFactor));
            BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
            bqBuilder.add(titleExpansionQuery, BooleanClause.Occur.MUST);
            Query tweetTimeRangeQuery = LongPoint.newRangeQuery(StatusField.EPOCH.name, (long) (Calendar.getInstance().getTimeInMillis() - interval) / 1000, (long) Calendar.getInstance().getTimeInMillis() / 1000);
            // must satisfy the time window, FILTER clause do not
            // participate in scoring
            bqBuilder.add(tweetTimeRangeQuery, BooleanClause.Occur.FILTER);
            Query q = bqBuilder.build();
            LOG.info("Parsed titleExpansionQuery " + titleExpansionQuery.getClass() + " looks like " + titleExpansionQuery.toString() + " " + titleExpansionQuery.getClass());
            LOG.info("Parsed finalQuery " + q.getClass() + " looks like " + q.toString() + " " + q.getClass());
            searcher.setSimilarity(titleExpansionSimilarity);
            totalHitCollector = new TotalHitCountCollector();
            // Second search and scoring part:
            // titleExpansionSimilarity(q,d)= (We*Ne+Wt*Nt)
            searcher.search(q, totalHitCollector);
            if (totalHitCollector.getTotalHits() > 0) {
                TopScoreDocCollector finalQueryHitCollector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits()));
                searcher.search(q, finalQueryHitCollector);
                ScoreDoc[] hits = finalQueryHitCollector.topDocs().scoreDocs;
                LOG.info("Title expansion similarity has " + totalHitCollector.getTotalHits() + " hits");
                // Re-score (titleExpansionSimilarity multiplied by
                // titleCoordSimilarity)
                // Sort by final score and timestamp (descending order)
                ArrayList<ScoreDocTimestamp> finalHits = new ArrayList<ScoreDocTimestamp>();
                for (int j = 0; j < hits.length; ++j) {
                    int docId = hits[j].doc;
                    if (coordHMap.containsKey(docId)) {
                        float docScore = hits[j].score;
                        Document fullDocument = searcher.doc(docId);
                        long timestamp = Long.parseLong(fullDocument.get(TRECIndexerRunnable.StatusField.EPOCH.name));
                        finalHits.add(new ScoreDocTimestamp(docId, docScore * coordHMap.get(docId), timestamp, fullDocument));
                    }
                }
                Collections.sort(finalHits, new ScoreDocComparator());
                LOG.info("Hit " + finalHits.size() + " documents");
                if (0 != finalHits.size()) {
                    LOG.info("Quering:" + titleExpansionQuery.toString() + ", Found " + finalHits.size() + " hits");
                }
                ArrayList<String> tweetList = new ArrayList<String>();
                HashMap<String, Float> scoreMap = new HashMap<String, Float>();
                for (int j = 0; j < finalHits.size(); ++j) {
                    int docId = finalHits.get(j).doc;
                    Document d = finalHits.get(j).fullDocument;
                    if (pushedTweets.size() < dailylimit && !pushedTweets.containsKey(d.get(TRECIndexerRunnable.StatusField.ID.name)) && !isDuplicate(d.get(TRECIndexerRunnable.StatusField.TEXT.name)) && finalHits.get(j).score >= 6) {
                        LOG.info(searcher.explain(titleExpansionQuery, docId).toString());
                        LOG.info("Multiplied by " + coordHMap.get(docId) + " Final score " + finalHits.get(j).score);
                        LOG.info("Raw text " + d.get(TRECIndexerRunnable.StatusField.RAW_TEXT.name) + " " + thisInterestProfile.queryTokenCount);
                        tweetList.add(d.get(TRECIndexerRunnable.StatusField.ID.name));
                        scoreMap.put(d.get(TRECIndexerRunnable.StatusField.ID.name), finalHits.get(j).score);
                        LOG.info("Tweet ID:" + String.valueOf(d.get(TRECIndexerRunnable.StatusField.ID.name)));
                        pushedTweets.put(d.get(TRECIndexerRunnable.StatusField.ID.name), d.get(TRECIndexerRunnable.StatusField.TEXT.name));
                    }
                    if (scenario.equals("A") && (pushedTweets.size() >= dailylimit)) {
                        shutDown = true;
                        break;
                    }
                }
                if (tweetList.size() > 0) {
                    if (scenario.equals("A"))
                        postTweetListScenarioA(tweetList, api);
                    else if (scenario.equals("B"))
                        postTweetListScenarioB(tweetList, api, scoreMap);
                } else {
                    LOG.info("Nothing interesting today, Gonna sleep for regular interval");
                }
            }
        } else {
            LOG.info("For this iteration, no single tweet hit even only the title field");
        }
        if (scenario.equals("A") && !shutDown) {
            now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
        }
        if (scenario.equals("A") && shutDown) {
            now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
            Calendar tomorrow = Calendar.getInstance();
            tomorrow.set(Calendar.HOUR, 0);
            tomorrow.set(Calendar.MINUTE, 0);
            tomorrow.set(Calendar.SECOND, 0);
            tomorrow.set(Calendar.AM_PM, Calendar.AM);
            tomorrow.set(Calendar.DAY_OF_YEAR, now.get(Calendar.DAY_OF_YEAR) + 1);
            tomorrow.setTimeZone(TimeZone.getTimeZone("UTC"));
            LOG.info("Reached dailyLimit, sleep for the rest of the day");
            LOG.info(tomorrow.getTimeInMillis() + " " + now.getTimeInMillis());
            Thread.sleep((long) tomorrow.getTimeInMillis() - now.getTimeInMillis() + 60000);
            now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
            shutDown = false;
            LOG.info("Woke up at this new day!");
            pushedTweets.clear();
        }
        reader.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (ParseException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : ClientBuilder(javax.ws.rs.client.ClientBuilder) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 10 with ParseException

use of org.apache.lucene.queryparser.classic.ParseException in project Anserini by castorini.

the class TweetServlet method doGet.

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (request.getRequestURI().equals("/search")) {
        response.setStatus(HttpServletResponse.SC_OK);
        response.setContentType("text/html");
        request.setCharacterEncoding("UTF-8");
        Query q;
        try {
            q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
            try {
                reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
            if (newReader != null) {
                reader.close();
                reader = newReader;
            }
            IndexSearcher searcher = new IndexSearcher(reader);
            int topN;
            if (request.getParameter("top") != null) {
                topN = Integer.parseInt(request.getParameter("top"));
            } else {
                // TODO configurable, default(parameter unspecified in url) topN = 20
                topN = 20;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
            }
            MustacheFactory mf = new DefaultMustacheFactory();
            Mustache mustache = mf.compile(MustacheTemplatePath);
            mustache.execute(response.getWriter(), tweetHits).flush();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else {
        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) Mustache(com.github.mustachejava.Mustache) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) MustacheFactory(com.github.mustachejava.MustacheFactory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Aggregations

ParseException (org.apache.lucene.queryparser.classic.ParseException)20 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)12 Query (org.apache.lucene.search.Query)11 IOException (java.io.IOException)9 BooleanQuery (org.apache.lucene.search.BooleanQuery)6 ArrayList (java.util.ArrayList)5 TermQuery (org.apache.lucene.search.TermQuery)5 Analyzer (org.apache.lucene.analysis.Analyzer)4 IndexReader (org.apache.lucene.index.IndexReader)4 IndexSearcher (org.apache.lucene.search.IndexSearcher)4 WildcardQuery (org.apache.lucene.search.WildcardQuery)4 Document (org.apache.lucene.document.Document)3 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)3 PrefixQuery (org.apache.lucene.search.PrefixQuery)3 Map (java.util.Map)2 FullTextExpression (org.apache.jackrabbit.oak.query.fulltext.FullTextExpression)2 PropertyRestriction (org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 CustomScoreQuery (org.apache.lucene.queries.CustomScoreQuery)2 MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)2