use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class CodeSearcher method getRepoDocuments.
/**
* Due to very large repositories (500,000 files) this needs to support
* paging. Also need to consider the fact that is a list of strings
* TODO maybe convert to hash so lookups are faster
*/
public List<String> getRepoDocuments(String repoName, int page) {
int REPOPAGELIMIT = 1000;
List<String> fileLocations = new ArrayList<>(REPOPAGELIMIT);
int start = REPOPAGELIMIT * page;
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(Values.REPONAME + ":" + repoName);
TopDocs results = searcher.search(query, Integer.MAX_VALUE);
int end = Math.min(results.totalHits, (REPOPAGELIMIT * (page + 1)));
ScoreDoc[] hits = results.scoreDocs;
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
fileLocations.add(doc.get(Values.PATH));
}
reader.close();
} catch (Exception ex) {
LOGGER.severe("CodeSearcher getRepoDocuments caught a " + ex.getClass() + " on page " + page + "\n with message: " + ex.getMessage());
}
return fileLocations;
}
use of org.apache.lucene.queryparser.classic.QueryParser in project searchcode-server by boyter.
the class CodeSearcher method getProjectStats.
public ProjectStats getProjectStats(String repoName) {
int totalCodeLines = 0;
int totalFiles = 0;
List<CodeFacetLanguage> codeFacetLanguages = new ArrayList<>();
List<CodeFacetOwner> repoFacetOwners = new ArrayList<>();
List<CodeFacetLanguage> codeByLines = new ArrayList<>();
SearchcodeLib searchcodeLib = Singleton.getSearchCodeLib();
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new CodeAnalyzer();
QueryParser parser = new QueryParser(CODEFIELD, analyzer);
Query query = parser.parse(Values.REPONAME + ":" + repoName);
TopDocs results = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] hits = results.scoreDocs;
Map<String, Integer> linesCount = new HashMap<>();
for (int i = 0; i < results.totalHits; i++) {
Document doc = searcher.doc(hits[i].doc);
if (!searchcodeLib.languageCostIgnore(doc.get(Values.LANGUAGENAME))) {
int lines = Singleton.getHelpers().tryParseInt(doc.get(Values.CODELINES), "0");
totalCodeLines += lines;
String languageName = doc.get(Values.LANGUAGENAME).replace("_", " ");
if (linesCount.containsKey(languageName)) {
linesCount.put(languageName, linesCount.get(languageName) + lines);
} else {
linesCount.put(languageName, lines);
}
}
}
for (String key : linesCount.keySet()) {
codeByLines.add(new CodeFacetLanguage(key, linesCount.get(key)));
}
codeByLines.sort((a, b) -> b.getCount() - a.getCount());
totalFiles = results.totalHits;
codeFacetLanguages = this.getLanguageFacetResults(searcher, reader, query);
repoFacetOwners = this.getOwnerFacetResults(searcher, reader, query);
reader.close();
} catch (Exception ex) {
LOGGER.severe("CodeSearcher getProjectStats caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
}
return new ProjectStats(totalCodeLines, totalFiles, codeFacetLanguages, codeByLines, repoFacetOwners);
}
use of org.apache.lucene.queryparser.classic.QueryParser in project janusgraph by JanusGraph.
the class LuceneIndex method query.
@Override
public Stream<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
final Query q;
try {
// writers.get(query.getStore()).getAnalyzer();
final Analyzer analyzer = delegatingAnalyzerFor(query.getStore(), information);
q = new QueryParser("_all", analyzer).parse(query.getQuery());
// Lucene query parser does not take additional parameters so any parameters on the RawQuery are ignored.
} catch (final ParseException e) {
throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
}
try {
final IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
if (searcher == null) {
// Index does not yet exist
return Collections.unmodifiableList(new ArrayList<RawQuery.Result<String>>()).stream();
}
final long time = System.currentTimeMillis();
// TODO: can we make offset more efficient in Lucene?
final int offset = query.getOffset();
int adjustedLimit = query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1;
if (adjustedLimit < Integer.MAX_VALUE - 1 - offset)
adjustedLimit += offset;
else
adjustedLimit = Integer.MAX_VALUE - 1;
final TopDocs docs = searcher.search(q, adjustedLimit);
log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
final List<RawQuery.Result<String>> result = new ArrayList<>(docs.scoreDocs.length);
for (int i = offset; i < docs.scoreDocs.length; i++) {
final IndexableField field = searcher.doc(docs.scoreDocs[i].doc).getField(DOCID);
result.add(new RawQuery.Result<>(field == null ? null : field.stringValue(), docs.scoreDocs[i].score));
}
return result.stream();
} catch (final IOException e) {
throw new TemporaryBackendException("Could not execute Lucene query", e);
}
}
Aggregations