use of org.apache.lucene.queryparser.classic.QueryParser in project elastic-core-maven by OrdinaryDude.
the class FullTextTrigger method search.
/**
* Search the Lucene index
*
* The result set will have the following columns:
* SCHEMA - Schema name (String)
* TABLE - Table name (String)
* COLUMNS - Primary key column names (String[]) - this is always DB_ID
* KEYS - Primary key values (Long[]) - this is always the DB_ID value for the table row
* SCORE - Lucene score (Float)
*
* @param conn SQL connection
* @param schema Schema name
* @param table Table name
* @param queryText Query expression
* @param limit Number of rows to return
* @param offset Offset with result set
* @return Search results
* @throws SQLException Unable to search the index
*/
public static ResultSet search(Connection conn, String schema, String table, String queryText, int limit, int offset) throws SQLException {
//
// Get Lucene index access
//
getIndexAccess(conn);
//
// Create the result set columns
//
SimpleResultSet result = new SimpleResultSet();
result.addColumn("SCHEMA", Types.VARCHAR, 0, 0);
result.addColumn("TABLE", Types.VARCHAR, 0, 0);
result.addColumn("COLUMNS", Types.ARRAY, 0, 0);
result.addColumn("KEYS", Types.ARRAY, 0, 0);
result.addColumn("SCORE", Types.FLOAT, 0, 0);
//
// Perform the search
//
// The _QUERY field contains the table and row identification (schema.table;keyName;keyValue)
// The _TABLE field is used to limit the search results to the current table
// The _DATA field contains the indexed row data (this is the default search field)
// The _MODIFIED field contains the row modification time (YYYYMMDDhhmmss) in GMT
//
indexLock.readLock().lock();
try {
QueryParser parser = new QueryParser("_DATA", analyzer);
parser.setDateResolution("_MODIFIED", DateTools.Resolution.SECOND);
parser.setDefaultOperator(QueryParser.Operator.AND);
Query query = parser.parse("_TABLE:" + schema.toUpperCase() + "." + table.toUpperCase() + " AND (" + queryText + ")");
TopDocs documents = indexSearcher.search(query, limit);
ScoreDoc[] hits = documents.scoreDocs;
int resultCount = Math.min(hits.length, (limit == 0 ? hits.length : limit));
int resultOffset = Math.min(offset, resultCount);
for (int i = resultOffset; i < resultCount; i++) {
Document document = indexSearcher.doc(hits[i].doc);
String[] indexParts = document.get("_QUERY").split(";");
String[] nameParts = indexParts[0].split("\\.");
result.addRow(nameParts[0], nameParts[1], new String[] { indexParts[1] }, new Long[] { Long.parseLong(indexParts[2]) }, hits[i].score);
}
} catch (ParseException exc) {
Logger.logDebugMessage("Lucene parse exception for query: " + queryText + "\n" + exc.getMessage());
throw new SQLException("Lucene parse exception for query: " + queryText + "\n" + exc.getMessage());
} catch (IOException exc) {
Logger.logErrorMessage("Unable to search Lucene index", exc);
throw new SQLException("Unable to search Lucene index", exc);
} finally {
indexLock.readLock().unlock();
}
return result;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project janusgraph by JanusGraph.
the class LuceneIndex method totals.
@Override
public Long totals(RawQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
final Query q;
try {
// writers.get(query.getStore()).getAnalyzer();
final Analyzer analyzer = delegatingAnalyzerFor(query.getStore(), information);
q = new QueryParser("_all", analyzer).parse(query.getQuery());
} catch (final ParseException e) {
throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
}
try {
final IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
// Index does not yet exist
if (searcher == null)
return 0L;
final long time = System.currentTimeMillis();
// Lucene doesn't like limits of 0. Also, it doesn't efficiently build a total list.
query.setLimit(1);
// We ignore offset and limit for totals
final TopDocs docs = searcher.search(q, 1);
log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
return docs.totalHits;
} catch (final IOException e) {
throw new TemporaryBackendException("Could not execute Lucene query", e);
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.
the class IndexUtils method printTermCounts.
public void printTermCounts(String termStr) throws IOException, ParseException {
EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
TermQuery q = (TermQuery) qp.parse(termStr);
Term t = q.getTerm();
System.out.println("raw term: " + termStr);
System.out.println("stemmed term: " + q.toString(LuceneDocumentGenerator.FIELD_BODY));
System.out.println("collection frequency: " + reader.totalTermFreq(t));
System.out.println("document frequency: " + reader.docFreq(t));
PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes());
System.out.println("postings:\n");
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.
the class Rm3Reranker method rerank.
@Override
public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) {
Preconditions.checkState(docs.documents.length == docs.scores.length);
IndexSearcher searcher = context.getIndexSearcher();
IndexReader reader = searcher.getIndexReader();
FeatureVector qfv = FeatureVector.fromTerms(AnalyzerUtils.tokenize(analyzer, context.getQueryText())).scaleToUnitL1Norm();
FeatureVector rm = estimateRelevanceModel(docs, reader);
LOG.info("Relevance model estimated.");
rm = FeatureVector.interpolate(qfv, rm, originalQueryWeight);
StringBuilder builder = new StringBuilder();
Iterator<String> terms = rm.iterator();
while (terms.hasNext()) {
String term = terms.next();
double prob = rm.getFeatureWeight(term);
builder.append(term + "^" + prob + " ");
}
String queryText = builder.toString().trim();
QueryParser p = new QueryParser(field, new WhitespaceAnalyzer());
Query nq = null;
try {
nq = p.parse(queryText);
} catch (ParseException e) {
e.printStackTrace();
return docs;
}
LOG.info("Running new query: " + nq);
TopDocs rs = null;
try {
if (context.getFilter() == null) {
rs = searcher.search(nq, 1000);
} else {
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(context.getFilter(), BooleanClause.Occur.FILTER);
bqBuilder.add(nq, BooleanClause.Occur.MUST);
Query q = bqBuilder.build();
rs = searcher.search(q, 1000);
}
} catch (IOException e) {
e.printStackTrace();
return docs;
}
return ScoredDocuments.fromTopDocs(rs, searcher);
}
use of org.apache.lucene.queryparser.classic.QueryParser in project Anserini by castorini.
the class EntityLinking method search.
/**
* Returns a list of query results.
*
* @param queryName the entity name to search
* @throws Exception on error
* @return a list of top ranked entities
*/
public List<RankedEntity> search(String queryName, int numHits) throws Exception {
List<RankedEntity> rankedEntities = new ArrayList<>();
// Initialize index searcher
IndexSearcher searcher = new IndexSearcher(reader);
// do exact search on query name
QueryParser queryParser = new QueryParser(IndexTopics.FIELD_NAME, new SimpleAnalyzer());
queryParser.setAutoGeneratePhraseQueries(true);
queryParser.setPhraseSlop(3);
queryName = "\"" + queryName + "\"";
Query query = queryParser.parse(queryName);
TopDocs rs = searcher.search(query, numHits);
ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
float score = docs.scores[i];
String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
String shortMid = getShortMid(mid);
String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
rankedEntities.add(new RankedEntity(shortMid, score, name, label));
}
if (docs.documents.length >= numHits) {
return rankedEntities;
}
int numHitsLeft = numHits - docs.documents.length;
// do TFIDF search
Similarity similarity = new ClassicSimilarity();
searcher.setSimilarity(similarity);
queryParser = new MultiFieldQueryParser(new String[] { IndexTopics.FIELD_NAME, IndexTopics.FIELD_LABEL }, new SimpleAnalyzer());
queryParser.setDefaultOperator(QueryParser.Operator.AND);
query = queryParser.parse(queryName);
rs = searcher.search(query, numHitsLeft);
docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
float score = docs.scores[i];
String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
String shortMid = getShortMid(mid);
String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
rankedEntities.add(new RankedEntity(shortMid, score, name, label));
}
return rankedEntities;
}
Aggregations