use of org.apache.lucene.queryparser.classic.QueryParser in project cogcomp-nlp by CogComp.
the class Lucene method getLuceneDocId.
/**
* returns tf-idf = (term_freq/inversve_doc_freq) for a given doc and a term.
*
* @param reader
* @param docIdField
* @param docId
* @return
* @throws IOException
*/
public static int getLuceneDocId(IndexReader reader, String docIdField, String docId) throws IOException {
int luceneDocId = -1;
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(docIdField, KEYWORD);
Query q = new TermQuery(new Term(docIdField, docId));
ScoreDoc[] docs = searcher.search(q, 1).scoreDocs;
if (docs.length == 0) {
logger.error("Document with docId : " + docId + " not found!");
System.exit(0);
return -1;
} else {
// Lucene DocId
luceneDocId = docs[0].doc;
return luceneDocId;
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class TimeCodeSearcher method getByRepoFileName.
/**
* Attempts to find a unique file given the repository name and the path/filename however
* it seems to randomly not find things for some files. No idea of the root cause at this point and have implemented
* a work around where we get the file by getById which is no ideal. The bug appears to be due to some issue
* inside lucene itself as using raw queries to pull back the file results in no matches, and yet it does appear
* when not limiting to the repo
* TODO investigate the lucene issue that occurs here mentioned above
* TODO needs to use the revision number here as well to get the right value
*/
public CodeResult getByRepoFileName(String repo, String fileName) {
CodeResult codeResult = null;
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
// TODO I have a feeling this may not be unique if there are to files in the same directory with different case... something to investigate
Query query = parser.parse(Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
Singleton.getLogger().info("Query to get by filename = " + Values.FILELOCATIONFILENAME + ":" + QueryParser.escape(repo + "/" + fileName));
TopDocs results = searcher.search(query, 1);
ScoreDoc[] hits = results.scoreDocs;
if (hits.length != 0) {
Document doc = searcher.doc(hits[0].doc);
String filepath = doc.get(Values.PATH);
List<String> code = new ArrayList<>();
try {
code = Files.readAllLines(Paths.get(filepath), StandardCharsets.UTF_8);
code = Singleton.getHelpers().readFileLines(filepath, Singleton.getHelpers().tryParseInt(Properties.getProperties().getProperty(Values.MAXFILELINEDEPTH, Values.DEFAULTMAXFILELINEDEPTH), Values.DEFAULTMAXFILELINEDEPTH));
} catch (Exception ex) {
Singleton.getLogger().info("Indexed file appears to binary: " + filepath);
}
codeResult = new CodeResult(code, null);
codeResult.setCodePath(doc.get(Values.FILELOCATIONFILENAME));
codeResult.setFileName(doc.get(Values.FILENAME));
codeResult.setLanguageName(doc.get(Values.LANGUAGENAME));
codeResult.setMd5hash(doc.get(Values.MD5HASH));
codeResult.setCodeLines(doc.get(Values.CODELINES));
codeResult.setDocumentId(hits[0].doc);
codeResult.setRepoName(doc.get(Values.REPONAME));
codeResult.setRepoLocation(doc.get(Values.REPOLOCATION));
codeResult.setCodeOwner(doc.get(Values.CODEOWNER));
}
reader.close();
} catch (Exception ex) {
LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return codeResult;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class TimeCodeSearcher method getRepoDocuments.
public List<String> getRepoDocuments(String repoName) {
List<String> fileLocations = new ArrayList<>();
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(Values.REPONAME + ":" + repoName);
TopDocs results = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] hits = results.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
fileLocations.add(doc.get(Values.FILELOCATIONFILENAME));
}
reader.close();
} catch (Exception ex) {
LOGGER.severe(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return fileLocations;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class TimeCodeSearcher method search.
/**
* Given a query and what page of results we are on return the matching results for that search
*/
public SearchResult search(String queryString, int page) {
SearchResult searchResult = new SearchResult();
statsService.incrementSearchCount();
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(queryString);
LOGGER.info("Searching for: " + query.toString(CODEFIELD));
searchResult = this.doPagingSearch(reader, searcher, query, page);
reader.close();
} catch (Exception ex) {
LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return searchResult;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class CodeSearcher method search.
/**
* Given a query and what page of results we are on return the matching results for that search
*/
public SearchResult search(String queryString, int page) {
SearchResult searchResult = new SearchResult();
statsService.incrementSearchCount();
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(queryString);
LOGGER.info("Searching for: " + query.toString(CODEFIELD));
LOGGER.searchLog(query.toString(CODEFIELD) + " " + page);
searchResult = this.doPagingSearch(reader, searcher, query, page);
reader.close();
} catch (Exception ex) {
LOGGER.warning(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return searchResult;
}
Aggregations