use of org.apache.lucene.queryparser.classic.ParseException in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getLuceneRequest.
/**
* Get the Lucene query for the given filter.
*
* @param plan index plan containing filter details
* @param reader the Lucene reader
* @return the Lucene query
*/
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
List<Query> qs = new ArrayList<Query>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
PlanResult planResult = getPlanResult(plan);
IndexDefinition defn = planResult.indexDefinition;
Analyzer analyzer = defn.getAnalyzer();
if (ft == null) {
// there might be no full-text constraint
// when using the LowCostLuceneIndexProvider
// which is used for testing
} else {
qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
}
//Check if native function is supported
PropertyRestriction pr = null;
if (defn.hasFunctionDefined()) {
pr = filter.getPropertyRestriction(defn.getFunctionName());
}
if (pr != null) {
String query = String.valueOf(pr.first.getValue(pr.first.getType()));
QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
if (query.startsWith("mlt?")) {
String mltQueryString = query.replace("mlt?", "");
if (reader != null) {
Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
if (moreLikeThis != null) {
qs.add(moreLikeThis);
}
}
} else if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?", "");
if (reader != null) {
return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
}
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
} else if (planResult.evaluateNonFullTextConstraints()) {
addNonFullTextConstraints(qs, plan, reader);
}
if (qs.size() == 0 && plan.getSortOrder() != null) {
//This case indicates that query just had order by and no
//property restriction defined. In this case property
//existence queries for each sort entry
List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
for (int i = 0; i < orders.size(); i++) {
OrderEntry oe = orders.get(i);
PropertyDefinition pd = planResult.getOrderedProperty(i);
PropertyRestriction orderRest = new PropertyRestriction();
orderRest.propertyName = oe.getPropertyName();
Query q = createQuery(orderRest, pd);
if (q != null) {
qs.add(q);
}
}
}
if (qs.size() == 0) {
if (reader == null) {
//just return match all queries
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
//be returned (if the index definition has a single rule)
if (planResult.evaluateNodeTypeRestriction()) {
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
throw new IllegalStateException("No query created for filter " + filter);
}
return performAdditionalWraps(qs);
}
use of org.apache.lucene.queryparser.classic.ParseException in project lucene-solr by apache.
the class FileBasedQueryMaker method prepareQueries.
@Override
protected Query[] prepareQueries() throws Exception {
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer"));
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
QueryParser qp = new QueryParser(defaultField, anlzr);
qp.setAllowLeadingWildcard(true);
List<Query> qq = new ArrayList<>();
String fileName = config.get("file.query.maker.file", null);
if (fileName != null) {
Path path = Paths.get(fileName);
Reader reader = null;
// note: we use a decoding reader, so if your queries are screwed up you know
if (Files.exists(path)) {
reader = Files.newBufferedReader(path, StandardCharsets.UTF_8);
} else {
//see if we can find it as a resource
InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
if (asStream != null) {
reader = IOUtils.getDecodingReader(asStream, StandardCharsets.UTF_8);
}
}
if (reader != null) {
try {
BufferedReader buffered = new BufferedReader(reader);
String line = null;
int lineNum = 0;
while ((line = buffered.readLine()) != null) {
line = line.trim();
if (line.length() != 0 && !line.startsWith("#")) {
try {
qq.add(qp.parse(line));
} catch (ParseException e) {
System.err.println("Exception: " + e.getMessage() + " occurred while parsing line: " + lineNum + " Text: " + line);
}
}
lineNum++;
}
} finally {
reader.close();
}
} else {
System.err.println("No Reader available for: " + fileName);
}
}
return qq.toArray(new Query[qq.size()]);
}
use of org.apache.lucene.queryparser.classic.ParseException in project lucene-solr by apache.
the class UserInputQueryBuilder method getQuery.
/* (non-Javadoc)
* @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
*/
@Override
public Query getQuery(Element e) throws ParserException {
String text = DOMUtils.getText(e);
try {
Query q = null;
if (unSafeParser != null) {
//synchronize on unsafe parser
synchronized (unSafeParser) {
q = unSafeParser.parse(text);
}
} else {
String fieldName = DOMUtils.getAttribute(e, "fieldName", defaultField);
//Create new parser
QueryParser parser = createQueryParser(fieldName, analyzer);
q = parser.parse(text);
}
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new BoostQuery(q, boost);
} catch (ParseException e1) {
throw new ParserException(e1.getMessage());
}
}
use of org.apache.lucene.queryparser.classic.ParseException in project Anserini by castorini.
the class TRECScenarioRunnable method run.
@SuppressWarnings("deprecation")
@Override
public void run() {
LOG.info("Running TRECScenarioSearcher Thread for " + thisInterestProfile.topicIndex);
try {
// When the thread wakes up at a new day, clear pushed tweets
if ((scenario.equals("A") && Calendar.getInstance(TimeZone.getTimeZone("UTC")).get(Calendar.DAY_OF_YEAR) != now.get(Calendar.DAY_OF_YEAR)) || (scenario.equals("B")))
pushedTweets.clear();
Query titleQuery = new QueryParser(TRECIndexerRunnable.StatusField.TEXT.name, Indexer.ANALYZER).parse(thisInterestProfile.titleQueryString());
LOG.info("Parsed titleQuery " + titleQuery.getClass() + " looks like " + titleQuery.toString() + " " + titleQuery.getClass());
reader = DirectoryReader.open(FSDirectory.open(new File(indexPath).toPath()));
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(titleCoordSimilarity);
// Get the total number of hits
TotalHitCountCollector totalHitCollector = new TotalHitCountCollector();
// First search and scoring part: titleCoordSimilarity(q,d) = Nt/T
searcher.search(titleQuery, totalHitCollector);
// Create a collector for these hits
if (totalHitCollector.getTotalHits() > 0) {
TopScoreDocCollector titleQueryHitCollector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits()));
searcher.search(titleQuery, titleQueryHitCollector);
ScoreDoc[] coordHits = titleQueryHitCollector.topDocs().scoreDocs;
HashMap<Integer, Float> coordHMap = new HashMap<Integer, Float>();
for (ScoreDoc s : coordHits) {
coordHMap.put(s.doc, s.score);
}
LOG.info("Title coordinate similarity has " + totalHitCollector.getTotalHits() + " hits");
Query titleExpansionQuery = new QueryParser(TRECIndexerRunnable.StatusField.TEXT.name, Indexer.ANALYZER).parse(thisInterestProfile.titleExpansionQueryString(titleBoostFactor, expansionBoostFactor));
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(titleExpansionQuery, BooleanClause.Occur.MUST);
Query tweetTimeRangeQuery = LongPoint.newRangeQuery(StatusField.EPOCH.name, (long) (Calendar.getInstance().getTimeInMillis() - interval) / 1000, (long) Calendar.getInstance().getTimeInMillis() / 1000);
// must satisfy the time window, FILTER clause do not
// participate in scoring
bqBuilder.add(tweetTimeRangeQuery, BooleanClause.Occur.FILTER);
Query q = bqBuilder.build();
LOG.info("Parsed titleExpansionQuery " + titleExpansionQuery.getClass() + " looks like " + titleExpansionQuery.toString() + " " + titleExpansionQuery.getClass());
LOG.info("Parsed finalQuery " + q.getClass() + " looks like " + q.toString() + " " + q.getClass());
searcher.setSimilarity(titleExpansionSimilarity);
totalHitCollector = new TotalHitCountCollector();
// Second search and scoring part:
// titleExpansionSimilarity(q,d)= (We*Ne+Wt*Nt)
searcher.search(q, totalHitCollector);
if (totalHitCollector.getTotalHits() > 0) {
TopScoreDocCollector finalQueryHitCollector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits()));
searcher.search(q, finalQueryHitCollector);
ScoreDoc[] hits = finalQueryHitCollector.topDocs().scoreDocs;
LOG.info("Title expansion similarity has " + totalHitCollector.getTotalHits() + " hits");
// Re-score (titleExpansionSimilarity multiplied by
// titleCoordSimilarity)
// Sort by final score and timestamp (descending order)
ArrayList<ScoreDocTimestamp> finalHits = new ArrayList<ScoreDocTimestamp>();
for (int j = 0; j < hits.length; ++j) {
int docId = hits[j].doc;
if (coordHMap.containsKey(docId)) {
float docScore = hits[j].score;
Document fullDocument = searcher.doc(docId);
long timestamp = Long.parseLong(fullDocument.get(TRECIndexerRunnable.StatusField.EPOCH.name));
finalHits.add(new ScoreDocTimestamp(docId, docScore * coordHMap.get(docId), timestamp, fullDocument));
}
}
Collections.sort(finalHits, new ScoreDocComparator());
LOG.info("Hit " + finalHits.size() + " documents");
if (0 != finalHits.size()) {
LOG.info("Quering:" + titleExpansionQuery.toString() + ", Found " + finalHits.size() + " hits");
}
ArrayList<String> tweetList = new ArrayList<String>();
HashMap<String, Float> scoreMap = new HashMap<String, Float>();
for (int j = 0; j < finalHits.size(); ++j) {
int docId = finalHits.get(j).doc;
Document d = finalHits.get(j).fullDocument;
if (pushedTweets.size() < dailylimit && !pushedTweets.containsKey(d.get(TRECIndexerRunnable.StatusField.ID.name)) && !isDuplicate(d.get(TRECIndexerRunnable.StatusField.TEXT.name)) && finalHits.get(j).score >= 6) {
LOG.info(searcher.explain(titleExpansionQuery, docId).toString());
LOG.info("Multiplied by " + coordHMap.get(docId) + " Final score " + finalHits.get(j).score);
LOG.info("Raw text " + d.get(TRECIndexerRunnable.StatusField.RAW_TEXT.name) + " " + thisInterestProfile.queryTokenCount);
tweetList.add(d.get(TRECIndexerRunnable.StatusField.ID.name));
scoreMap.put(d.get(TRECIndexerRunnable.StatusField.ID.name), finalHits.get(j).score);
LOG.info("Tweet ID:" + String.valueOf(d.get(TRECIndexerRunnable.StatusField.ID.name)));
pushedTweets.put(d.get(TRECIndexerRunnable.StatusField.ID.name), d.get(TRECIndexerRunnable.StatusField.TEXT.name));
}
if (scenario.equals("A") && (pushedTweets.size() >= dailylimit)) {
shutDown = true;
break;
}
}
if (tweetList.size() > 0) {
if (scenario.equals("A"))
postTweetListScenarioA(tweetList, api);
else if (scenario.equals("B"))
postTweetListScenarioB(tweetList, api, scoreMap);
} else {
LOG.info("Nothing interesting today, Gonna sleep for regular interval");
}
}
} else {
LOG.info("For this iteration, no single tweet hit even only the title field");
}
if (scenario.equals("A") && !shutDown) {
now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
}
if (scenario.equals("A") && shutDown) {
now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
Calendar tomorrow = Calendar.getInstance();
tomorrow.set(Calendar.HOUR, 0);
tomorrow.set(Calendar.MINUTE, 0);
tomorrow.set(Calendar.SECOND, 0);
tomorrow.set(Calendar.AM_PM, Calendar.AM);
tomorrow.set(Calendar.DAY_OF_YEAR, now.get(Calendar.DAY_OF_YEAR) + 1);
tomorrow.setTimeZone(TimeZone.getTimeZone("UTC"));
LOG.info("Reached dailyLimit, sleep for the rest of the day");
LOG.info(tomorrow.getTimeInMillis() + " " + now.getTimeInMillis());
Thread.sleep((long) tomorrow.getTimeInMillis() - now.getTimeInMillis() + 60000);
now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
shutDown = false;
LOG.info("Woke up at this new day!");
pushedTweets.clear();
}
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.lucene.queryparser.classic.ParseException in project Anserini by castorini.
the class TweetServlet method doGet.
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (request.getRequestURI().equals("/search")) {
response.setStatus(HttpServletResponse.SC_OK);
response.setContentType("text/html");
request.setCharacterEncoding("UTF-8");
Query q;
try {
q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN;
if (request.getParameter("top") != null) {
topN = Integer.parseInt(request.getParameter("top"));
} else {
// TODO configurable, default(parameter unspecified in url) topN = 20
topN = 20;
}
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
}
MustacheFactory mf = new DefaultMustacheFactory();
Mustache mustache = mf.compile(MustacheTemplatePath);
mustache.execute(response.getWriter(), tweetHits).flush();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
}
}
Aggregations